Update data
This commit is contained in:
parent
a4103b0896
commit
273d7a83ee
15 changed files with 716 additions and 316 deletions
|
|
@ -10,7 +10,11 @@ import pyarrow as pa
|
|||
import pyarrow.csv as pa_csv
|
||||
import pyarrow.parquet as pq
|
||||
|
||||
from ..utils import fuzzy_join_on_postcode
|
||||
from ..utils import (
|
||||
fuzzy_join_on_postcode,
|
||||
normalize_address_key,
|
||||
normalize_postcode_key,
|
||||
)
|
||||
|
||||
|
||||
pl.Config.set_tbl_cols(-1)
|
||||
|
|
@ -193,12 +197,15 @@ def main():
|
|||
|
||||
|
||||
def _run(epc_path: Path, price_paid_path: Path, output_path: Path, temp_dir: Path):
|
||||
epc_base = _scan_epc_certificates(epc_path, temp_dir)
|
||||
epc_base = _scan_epc_certificates(epc_path, temp_dir).with_columns(
|
||||
normalize_address_key(pl.col("epc_address")).alias("_epc_match_address"),
|
||||
normalize_postcode_key(pl.col("epc_postcode")).alias("_epc_match_postcode"),
|
||||
)
|
||||
|
||||
# Dedup fork: keep latest certificate per property (existing logic)
|
||||
epc = (
|
||||
epc_base.sort("inspection_date", descending=True)
|
||||
.group_by("epc_address", "epc_postcode")
|
||||
.group_by("_epc_match_address", "_epc_match_postcode")
|
||||
.first()
|
||||
.drop("tenure")
|
||||
)
|
||||
|
|
@ -216,15 +223,15 @@ def _run(epc_path: Path, price_paid_path: Path, output_path: Path, temp_dir: Pat
|
|||
.with_columns(
|
||||
pl.col("number_habitable_rooms")
|
||||
.shift(1)
|
||||
.over("epc_address", "epc_postcode")
|
||||
.over("_epc_match_address", "_epc_match_postcode")
|
||||
.alias("_prev_rooms"),
|
||||
pl.col("total_floor_area")
|
||||
.shift(1)
|
||||
.over("epc_address", "epc_postcode")
|
||||
.over("_epc_match_address", "_epc_match_postcode")
|
||||
.alias("_prev_area"),
|
||||
pl.col("_rating_rank")
|
||||
.shift(1)
|
||||
.over("epc_address", "epc_postcode")
|
||||
.over("_epc_match_address", "_epc_match_postcode")
|
||||
.alias("_prev_rating_rank"),
|
||||
)
|
||||
.with_columns(
|
||||
|
|
@ -257,7 +264,7 @@ def _run(epc_path: Path, price_paid_path: Path, output_path: Path, temp_dir: Pat
|
|||
.cast(pl.Int32)
|
||||
.alias("_event_year"),
|
||||
)
|
||||
.group_by("epc_address", "epc_postcode")
|
||||
.group_by("_epc_match_address", "_epc_match_postcode")
|
||||
.agg(
|
||||
pl.struct(
|
||||
pl.col("_event_year").alias("year"),
|
||||
|
|
@ -276,7 +283,7 @@ def _run(epc_path: Path, price_paid_path: Path, output_path: Path, temp_dir: Pat
|
|||
# Social tenure fork: flag properties that were ever social housing
|
||||
social_tenure = (
|
||||
epc_base.filter(pl.col("tenure").str.to_lowercase().str.contains("social"))
|
||||
.select("epc_address", "epc_postcode")
|
||||
.select("_epc_match_address", "_epc_match_postcode")
|
||||
.unique()
|
||||
.with_columns(pl.lit("Yes").alias("was_council_house"))
|
||||
.collect()
|
||||
|
|
@ -287,12 +294,12 @@ def _run(epc_path: Path, price_paid_path: Path, output_path: Path, temp_dir: Pat
|
|||
epc = (
|
||||
epc.join(
|
||||
events.lazy(),
|
||||
on=["epc_address", "epc_postcode"],
|
||||
on=["_epc_match_address", "_epc_match_postcode"],
|
||||
how="left",
|
||||
)
|
||||
.join(
|
||||
social_tenure.lazy(),
|
||||
on=["epc_address", "epc_postcode"],
|
||||
on=["_epc_match_address", "_epc_match_postcode"],
|
||||
how="left",
|
||||
)
|
||||
.with_columns(
|
||||
|
|
@ -339,9 +346,23 @@ def _run(epc_path: Path, price_paid_path: Path, output_path: Path, temp_dir: Pat
|
|||
ignore_nulls=True,
|
||||
).alias("pp_address"),
|
||||
)
|
||||
.with_columns(
|
||||
normalize_address_key(pl.col("pp_address")).alias("_pp_match_address"),
|
||||
normalize_postcode_key(pl.col("postcode")).alias("_pp_match_postcode"),
|
||||
)
|
||||
.filter(pl.col("_pp_match_postcode").is_not_null())
|
||||
.with_columns(
|
||||
pl.coalesce("_pp_match_address", "pp_address").alias("_pp_group_address"),
|
||||
pl.col("_pp_match_postcode").alias("_pp_group_postcode"),
|
||||
)
|
||||
.filter(pl.col("pp_address").is_not_null())
|
||||
.sort("date_of_transfer")
|
||||
.group_by("pp_address", "postcode", maintain_order=True)
|
||||
.group_by("_pp_group_address", "_pp_group_postcode", maintain_order=True)
|
||||
.agg(
|
||||
pl.col("pp_address").last(),
|
||||
pl.col("postcode").last(),
|
||||
pl.col("_pp_match_address").last(),
|
||||
pl.col("_pp_match_postcode").last(),
|
||||
pl.struct(
|
||||
pl.col("date_of_transfer").dt.year().alias("year"),
|
||||
pl.col("date_of_transfer").dt.month().cast(pl.UInt8).alias("month"),
|
||||
|
|
@ -354,7 +375,7 @@ def _run(epc_path: Path, price_paid_path: Path, output_path: Path, temp_dir: Pat
|
|||
pl.col("date_of_transfer").first().alias("first_transfer_date"),
|
||||
pl.col("old_new").first(),
|
||||
)
|
||||
).filter(pl.col("pp_address").is_not_null())
|
||||
)
|
||||
|
||||
print("Price paid dataset")
|
||||
print(price_paid.head().collect())
|
||||
|
|
@ -405,7 +426,19 @@ def _run(epc_path: Path, price_paid_path: Path, output_path: Path, temp_dir: Pat
|
|||
.then(pl.lit(1, dtype=pl.UInt8))
|
||||
.otherwise(pl.lit(None, dtype=pl.UInt8))
|
||||
.alias("is_construction_date_approximate"),
|
||||
).drop("old_new", "first_transfer_date")
|
||||
).drop(
|
||||
[
|
||||
"old_new",
|
||||
"first_transfer_date",
|
||||
"_pp_match_address",
|
||||
"_pp_match_postcode",
|
||||
"_pp_group_address",
|
||||
"_pp_group_postcode",
|
||||
"_epc_match_address",
|
||||
"_epc_match_postcode",
|
||||
],
|
||||
strict=False,
|
||||
)
|
||||
|
||||
joined = joined.rename({col: col.lower() for col in joined.columns})
|
||||
|
||||
|
|
|
|||
|
|
@ -22,6 +22,8 @@ _AREA_COLUMNS = [
|
|||
"Postcode",
|
||||
"lat",
|
||||
"lon",
|
||||
# Runtime provenance for deciding whether missing coordinates are skippable.
|
||||
"ctry25cd",
|
||||
# Deprivation
|
||||
"Income Score",
|
||||
"Employment Score",
|
||||
|
|
@ -86,6 +88,15 @@ _AREA_COLUMNS = [
|
|||
_DYNAMIC_POI_DISTANCE_RE = re.compile(r"^Distance to nearest amenity \(.+\) \(km\)$")
|
||||
_DYNAMIC_POI_COUNT_RE = re.compile(r"^Number of amenities \(.+\) within (2|5)km$")
|
||||
TREE_DENSITY_FEATURE = "Street tree density percentile"
|
||||
_POSTCODE_TREE_DENSITY_PERCENTILE_RE = re.compile(
|
||||
r"^Tree canopy density percentile within \d+m$"
|
||||
)
|
||||
_RENT_SOURCE_UNAVAILABLE_LADS = {
|
||||
# ONS PIPR does not publish LAD-level private-rent estimates for these
|
||||
# small authorities. Keep rent null there, but fail on any other LAD miss.
|
||||
"E06000053": "Isles of Scilly",
|
||||
"E09000001": "City of London",
|
||||
}
|
||||
|
||||
|
||||
def _is_dynamic_poi_metric_column(column: str) -> bool:
|
||||
|
|
@ -112,6 +123,107 @@ def _less_deprived_percentile_expr(column: str) -> pl.Expr:
|
|||
)
|
||||
|
||||
|
||||
def _tree_density_by_postcode(tree_density_postcodes_path: Path) -> pl.LazyFrame:
|
||||
tree_density = pl.scan_parquet(tree_density_postcodes_path)
|
||||
columns = set(tree_density.collect_schema().names())
|
||||
if "postcode" not in columns:
|
||||
raise ValueError(
|
||||
f"{tree_density_postcodes_path} is missing required column: postcode"
|
||||
)
|
||||
|
||||
if TREE_DENSITY_FEATURE in columns:
|
||||
density_column = TREE_DENSITY_FEATURE
|
||||
else:
|
||||
candidates = sorted(
|
||||
c for c in columns if _POSTCODE_TREE_DENSITY_PERCENTILE_RE.match(c)
|
||||
)
|
||||
if len(candidates) != 1:
|
||||
raise ValueError(
|
||||
f'{tree_density_postcodes_path} must contain column "{TREE_DENSITY_FEATURE}" '
|
||||
'or exactly one "Tree canopy density percentile within {radius}m" column; '
|
||||
f"found {len(candidates)} postcode percentile columns"
|
||||
)
|
||||
density_column = candidates[0]
|
||||
|
||||
return (
|
||||
tree_density.select(
|
||||
pl.col("postcode"),
|
||||
pl.col(density_column).cast(pl.Float32).alias(TREE_DENSITY_FEATURE),
|
||||
)
|
||||
.drop_nulls(["postcode"])
|
||||
.unique(["postcode"])
|
||||
)
|
||||
|
||||
|
||||
def _validate_lad_source_coverage(
|
||||
iod_path: Path, ethnicity_path: Path, rental_prices_path: Path
|
||||
) -> None:
|
||||
iod_lads = (
|
||||
pl.read_parquet(
|
||||
iod_path,
|
||||
columns=[
|
||||
"Local Authority District code (2024)",
|
||||
"Local Authority District name (2024)",
|
||||
],
|
||||
)
|
||||
.rename(
|
||||
{
|
||||
"Local Authority District code (2024)": "lad",
|
||||
"Local Authority District name (2024)": "lad_name",
|
||||
}
|
||||
)
|
||||
.unique(["lad"])
|
||||
)
|
||||
|
||||
ethnicity_lads = pl.read_parquet(ethnicity_path, columns=["Geography_code"]).rename(
|
||||
{"Geography_code": "lad"}
|
||||
)
|
||||
missing_ethnicity = iod_lads.join(ethnicity_lads, on="lad", how="anti").sort("lad")
|
||||
if missing_ethnicity.height > 0:
|
||||
raise ValueError(
|
||||
"Ethnicity data is missing 2024 LAD coverage: "
|
||||
f"{missing_ethnicity.to_dicts()}"
|
||||
)
|
||||
|
||||
rental_lads = pl.read_parquet(rental_prices_path, columns=["area_code"]).rename(
|
||||
{"area_code": "lad"}
|
||||
)
|
||||
missing_rent = iod_lads.join(rental_lads, on="lad", how="anti").sort("lad")
|
||||
unexpected_missing_rent = missing_rent.filter(
|
||||
~pl.col("lad").is_in(list(_RENT_SOURCE_UNAVAILABLE_LADS))
|
||||
)
|
||||
if unexpected_missing_rent.height > 0:
|
||||
raise ValueError(
|
||||
"Rental data is missing 2024 LAD coverage: "
|
||||
f"{unexpected_missing_rent.to_dicts()}"
|
||||
)
|
||||
if missing_rent.height > 0:
|
||||
print(
|
||||
"PIPR has no LAD-level rent estimates for source-unavailable LADs; "
|
||||
f"rent will remain null there: {missing_rent.to_dicts()}"
|
||||
)
|
||||
|
||||
|
||||
def _validate_property_postcodes(df: pl.DataFrame) -> None:
|
||||
invalid = df.filter(
|
||||
pl.col("Postcode").is_null()
|
||||
| (pl.col("Postcode").cast(pl.Utf8).str.strip_chars() == "")
|
||||
)
|
||||
if invalid.height == 0:
|
||||
return
|
||||
|
||||
sample_cols = [
|
||||
col
|
||||
for col in ("Postcode", "Address per Property Register", "Last known price")
|
||||
if col in invalid.columns
|
||||
]
|
||||
sample = invalid.select(sample_cols).head(10).to_dicts()
|
||||
raise ValueError(
|
||||
"Property rows missing a postcode after merge: "
|
||||
f"{invalid.height} rows. Sample: {sample}"
|
||||
)
|
||||
|
||||
|
||||
def _build(
|
||||
epc_pp_path: Path,
|
||||
arcgis_path: Path,
|
||||
|
|
@ -126,12 +238,14 @@ def _build(
|
|||
lsoa_population_path: Path,
|
||||
median_age_path: Path,
|
||||
election_results_path: Path,
|
||||
tree_density_addresses_path: Path | None = None,
|
||||
tree_density_postcodes_path: Path | None = None,
|
||||
) -> tuple[pl.DataFrame, pl.DataFrame]:
|
||||
"""Build postcode and properties dataframes from epc_pp + auxiliary data.
|
||||
|
||||
Returns (postcode_df, properties_df).
|
||||
"""
|
||||
_validate_lad_source_coverage(iod_path, ethnicity_path, rental_prices_path)
|
||||
|
||||
wide = pl.scan_parquet(epc_pp_path).filter(
|
||||
pl.col("total_floor_area").is_null()
|
||||
| (pl.col("total_floor_area") > MIN_FLOOR_AREA_M2)
|
||||
|
|
@ -152,9 +266,15 @@ def _build(
|
|||
.drop("new_postcode")
|
||||
)
|
||||
|
||||
arcgis_raw = pl.scan_parquet(arcgis_path)
|
||||
postcode_country = arcgis_raw.select(
|
||||
pl.col("pcds").alias("postcode"),
|
||||
pl.col("ctry25cd"),
|
||||
).unique(["postcode"])
|
||||
wide = wide.join(postcode_country, on="postcode", how="left")
|
||||
|
||||
arcgis = (
|
||||
pl.scan_parquet(arcgis_path)
|
||||
.filter(pl.col("ctry25cd") == "E92000001") # England only
|
||||
arcgis_raw.filter(pl.col("ctry25cd") == "E92000001") # England only
|
||||
.filter(pl.col("doterm").is_null()) # Active postcodes only
|
||||
# NSPL Feb 2026 renamed geographic code columns to {field}{year}cd.
|
||||
# Alias them back to the short canonical names used across the
|
||||
|
|
@ -191,7 +311,9 @@ def _build(
|
|||
.cast(pl.UInt8)
|
||||
.alias("_bedrooms"),
|
||||
)
|
||||
rental = pl.scan_parquet(rental_prices_path)
|
||||
rental = pl.scan_parquet(rental_prices_path).select(
|
||||
"area_code", "bedrooms", "mean_monthly_rent"
|
||||
)
|
||||
wide = wide.join(
|
||||
rental,
|
||||
left_on=["Local Authority District code (2024)", "_bedrooms"],
|
||||
|
|
@ -260,17 +382,9 @@ def _build(
|
|||
school_proximity = pl.scan_parquet(school_proximity_path)
|
||||
wide = wide.join(school_proximity, on="postcode", how="left")
|
||||
|
||||
if tree_density_addresses_path is not None:
|
||||
tree_density = (
|
||||
pl.scan_parquet(tree_density_addresses_path)
|
||||
.select(
|
||||
pl.col("postcode"),
|
||||
pl.col("pp_address"),
|
||||
pl.col(TREE_DENSITY_FEATURE).cast(pl.Float32),
|
||||
)
|
||||
.unique(["postcode", "pp_address"])
|
||||
)
|
||||
wide = wide.join(tree_density, on=["postcode", "pp_address"], how="left")
|
||||
if tree_density_postcodes_path is not None:
|
||||
tree_density = _tree_density_by_postcode(tree_density_postcodes_path)
|
||||
wide = wide.join(tree_density, on="postcode", how="left")
|
||||
|
||||
# Broadband: derive max available download speed tier per postcode from
|
||||
# Ofcom availability percentages. Tiers: Gigabit ≥1000, UFBB ≥300,
|
||||
|
|
@ -415,6 +529,7 @@ def _build(
|
|||
|
||||
print("Collecting with streaming engine...")
|
||||
df = wide.collect(engine="streaming")
|
||||
_validate_property_postcodes(df)
|
||||
|
||||
# Split into postcode-level and property-level dataframes
|
||||
area_cols = [
|
||||
|
|
@ -508,10 +623,10 @@ def main():
|
|||
help="2024 General Election results by constituency parquet file",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tree-density-addresses",
|
||||
"--tree-density-postcodes",
|
||||
type=Path,
|
||||
required=False,
|
||||
help="Address-level tree density parquet from pipeline.transform.tree_density",
|
||||
help="Postcode-level tree density parquet from pipeline.transform.tree_density",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-postcodes",
|
||||
|
|
@ -541,7 +656,7 @@ def main():
|
|||
lsoa_population_path=args.lsoa_population,
|
||||
median_age_path=args.median_age,
|
||||
election_results_path=args.election_results,
|
||||
tree_density_addresses_path=args.tree_density_addresses,
|
||||
tree_density_postcodes_path=args.tree_density_postcodes,
|
||||
)
|
||||
|
||||
print(f"\nPostcode columns: {postcode_df.columns}")
|
||||
|
|
|
|||
|
|
@ -227,7 +227,18 @@ def main():
|
|||
fa = test["Total floor area (sqm)"].cast(pl.Float64).fill_null(0.0).to_numpy()
|
||||
|
||||
print("\nComputing kNN estimates...")
|
||||
knn_psm = knn_median_psm(trees, lat, lon, tg)
|
||||
last_sale_dates = (
|
||||
test["input_date"].dt.epoch("d").fill_null(-1).to_numpy().astype(np.int64)
|
||||
)
|
||||
knn_psm = knn_median_psm(
|
||||
trees,
|
||||
lat,
|
||||
lon,
|
||||
tg,
|
||||
postcodes=test["Postcode"].fill_null("").to_numpy(),
|
||||
last_prices=test["input_price"].cast(pl.Float64).to_numpy(),
|
||||
last_sale_dates=last_sale_dates,
|
||||
)
|
||||
|
||||
# Temporal adjustment: pool PSM is at ref, adjust to actual
|
||||
log_idx_actual = test["log_index_actual"].to_numpy().astype(np.float64)
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ for lat/lon needed by kNN, then drops those columns before writing.
|
|||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import polars as pl
|
||||
|
||||
from pipeline.transform.price_estimation.knn import (
|
||||
|
|
@ -28,6 +29,45 @@ from pipeline.transform.price_estimation.utils import (
|
|||
type_group_expr,
|
||||
)
|
||||
|
||||
MAX_KNN_TO_INDEX_RATIO = 2.0
|
||||
MIN_KNN_TO_INDEX_RATIO = 0.5
|
||||
MAX_ESTIMATE_TO_LAST_PRICE_RATIO = 6.0
|
||||
|
||||
|
||||
def guarded_blend_estimates(
|
||||
index_est: np.ndarray,
|
||||
knn_est: np.ndarray,
|
||||
last_prices: np.ndarray,
|
||||
weight: float = KNN_BLEND_WEIGHT,
|
||||
) -> np.ndarray:
|
||||
"""Blend only stable kNN estimates and cap final uplift from last sale price."""
|
||||
index_est = index_est.astype(np.float64, copy=False)
|
||||
knn_est = knn_est.astype(np.float64, copy=False)
|
||||
last_prices = last_prices.astype(np.float64, copy=False)
|
||||
|
||||
has_index = np.isfinite(index_est) & (index_est > 0)
|
||||
has_knn = np.isfinite(knn_est) & (knn_est > 0)
|
||||
stable_knn = has_knn & (
|
||||
has_index
|
||||
& (knn_est >= index_est * MIN_KNN_TO_INDEX_RATIO)
|
||||
& (knn_est <= index_est * MAX_KNN_TO_INDEX_RATIO)
|
||||
)
|
||||
|
||||
blended = np.where(
|
||||
has_index & stable_knn,
|
||||
(1 - weight) * index_est + weight * knn_est,
|
||||
np.where(has_index, index_est, np.nan),
|
||||
)
|
||||
|
||||
cap = np.where(
|
||||
np.isfinite(last_prices) & (last_prices > 0),
|
||||
last_prices * MAX_ESTIMATE_TO_LAST_PRICE_RATIO,
|
||||
np.nan,
|
||||
)
|
||||
return np.where(
|
||||
np.isfinite(cap) & np.isfinite(blended), np.minimum(blended, cap), blended
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
|
|
@ -130,36 +170,54 @@ def main():
|
|||
lon = df["lon"].cast(pl.Float64).to_numpy()
|
||||
tg = df["_type_group"].fill_null("").to_numpy()
|
||||
fa = df["Total floor area (sqm)"].cast(pl.Float64).fill_null(0.0).to_numpy()
|
||||
last_prices = (
|
||||
df["Last known price"].cast(pl.Float64).fill_null(float("nan")).to_numpy()
|
||||
)
|
||||
last_sale_dates = (
|
||||
df["Date of last transaction"]
|
||||
.dt.epoch("d")
|
||||
.fill_null(-1)
|
||||
.to_numpy()
|
||||
.astype(np.int64)
|
||||
)
|
||||
|
||||
knn_psm = knn_median_psm(trees, lat, lon, tg)
|
||||
knn_psm = knn_median_psm(
|
||||
trees,
|
||||
lat,
|
||||
lon,
|
||||
tg,
|
||||
postcodes=df["Postcode"].fill_null("").to_numpy(),
|
||||
last_prices=last_prices,
|
||||
last_sale_dates=last_sale_dates,
|
||||
)
|
||||
knn_est = knn_psm * fa # No temporal adj: ref == current
|
||||
|
||||
df = df.with_columns(
|
||||
pl.Series("_knn_est", knn_est, dtype=pl.Float64),
|
||||
)
|
||||
|
||||
# Blend: where kNN available, use weighted average; else keep index
|
||||
# Blend only when kNN is close to the index estimate; otherwise keep index.
|
||||
index_est = (
|
||||
df["Estimated current price"]
|
||||
.cast(pl.Float64)
|
||||
.fill_null(float("nan"))
|
||||
.to_numpy()
|
||||
)
|
||||
blended = guarded_blend_estimates(index_est, knn_est, last_prices)
|
||||
df = df.with_columns(
|
||||
pl.when(
|
||||
pl.col("Estimated current price").is_not_null()
|
||||
& pl.col("_knn_est").is_not_null()
|
||||
& pl.col("_knn_est").is_finite()
|
||||
& (pl.col("_knn_est") > 0)
|
||||
)
|
||||
.then(
|
||||
(1 - KNN_BLEND_WEIGHT) * pl.col("Estimated current price")
|
||||
+ KNN_BLEND_WEIGHT * pl.col("_knn_est")
|
||||
)
|
||||
.when(pl.col("Estimated current price").is_not_null())
|
||||
.then(pl.col("Estimated current price"))
|
||||
.otherwise(pl.lit(None))
|
||||
.alias("Estimated current price"),
|
||||
pl.Series("_index_est", index_est, dtype=pl.Float64),
|
||||
pl.Series("Estimated current price", blended, dtype=pl.Float64),
|
||||
).with_columns(
|
||||
pl.col("Estimated current price").fill_nan(None),
|
||||
)
|
||||
|
||||
n_blended = df.filter(
|
||||
pl.col("_knn_est").is_not_null()
|
||||
& pl.col("_knn_est").is_finite()
|
||||
& (pl.col("_knn_est") > 0)
|
||||
& (pl.col("_index_est").is_not_null())
|
||||
& (pl.col("_knn_est") >= pl.col("_index_est") * MIN_KNN_TO_INDEX_RATIO)
|
||||
& (pl.col("_knn_est") <= pl.col("_index_est") * MAX_KNN_TO_INDEX_RATIO)
|
||||
& pl.col("Estimated current price").is_not_null()
|
||||
).height
|
||||
print(f" kNN blended: {n_blended:,} of {n_estimated:,} estimates")
|
||||
|
|
|
|||
|
|
@ -21,6 +21,10 @@ from pipeline.transform.price_estimation.utils import (
|
|||
KNN_K = 20
|
||||
KNN_MIN_NEIGHBORS = 5
|
||||
KNN_BLEND_WEIGHT = 0.35
|
||||
MIN_COMPARABLE_FLOOR_AREA_SQM = 15.0
|
||||
MAX_COMPARABLE_FLOOR_AREA_SQM = 1_000.0
|
||||
MIN_COMPARABLE_PSM = 500.0
|
||||
MAX_COMPARABLE_PSM = 50_000.0
|
||||
|
||||
|
||||
def _scale_coords(lat: np.ndarray, lon: np.ndarray) -> np.ndarray:
|
||||
|
|
@ -33,13 +37,14 @@ def build_knn_pool(
|
|||
index: pl.DataFrame,
|
||||
ref_frac_year: float,
|
||||
max_sale_year: int | None = None,
|
||||
) -> dict[str, tuple[KDTree, np.ndarray]]:
|
||||
) -> dict[str, tuple[KDTree, np.ndarray, np.ndarray, np.ndarray, np.ndarray]]:
|
||||
"""Build per-type_group KD-trees of index-adjusted price-per-sqm.
|
||||
|
||||
Adjusts all pool properties' sale prices to ref_frac_year using the index,
|
||||
then builds a KD-tree per type_group for nearest-neighbor queries.
|
||||
|
||||
Returns dict mapping type_group -> (KDTree over scaled lat/lon, adjusted_psm array).
|
||||
Returns dict mapping type_group to KDTree, adjusted PSM, and sale identity
|
||||
arrays used to keep the target sale out of its own comparable set.
|
||||
"""
|
||||
print("Building kNN pool...")
|
||||
lf = pl.scan_parquet(source) if isinstance(source, Path) else source
|
||||
|
|
@ -55,7 +60,8 @@ def build_knn_pool(
|
|||
pl.col("lat").is_not_null(),
|
||||
pl.col("lon").is_not_null(),
|
||||
pl.col("Total floor area (sqm)").is_not_null(),
|
||||
pl.col("Total floor area (sqm)") > 0,
|
||||
pl.col("Total floor area (sqm)") >= MIN_COMPARABLE_FLOOR_AREA_SQM,
|
||||
pl.col("Total floor area (sqm)") <= MAX_COMPARABLE_FLOOR_AREA_SQM,
|
||||
pl.col("Last known price").is_not_null(),
|
||||
pl.col("Last known price") > 0,
|
||||
pl.col("Postcode").is_not_null(),
|
||||
|
|
@ -97,12 +103,13 @@ def build_knn_pool(
|
|||
).filter(
|
||||
pl.col("_adj_psm").is_not_null(),
|
||||
pl.col("_adj_psm").is_finite(),
|
||||
pl.col("_adj_psm") > 0,
|
||||
pl.col("_adj_psm") >= MIN_COMPARABLE_PSM,
|
||||
pl.col("_adj_psm") <= MAX_COMPARABLE_PSM,
|
||||
)
|
||||
print(f" {len(pool):,} after index adjustment")
|
||||
|
||||
# Build per-type KD-trees
|
||||
trees: dict[str, tuple[KDTree, np.ndarray]] = {}
|
||||
trees: dict[str, tuple[KDTree, np.ndarray, np.ndarray, np.ndarray, np.ndarray]] = {}
|
||||
for tg in TYPE_GROUPS:
|
||||
sub = pool.filter(pl.col("type_group") == tg)
|
||||
n = len(sub)
|
||||
|
|
@ -111,19 +118,49 @@ def build_knn_pool(
|
|||
lat = sub["lat"].to_numpy().astype(np.float64)
|
||||
lon = sub["lon"].to_numpy().astype(np.float64)
|
||||
psm = sub["_adj_psm"].to_numpy().astype(np.float64)
|
||||
postcodes = sub["Postcode"].fill_null("").to_numpy()
|
||||
prices = sub["Last known price"].to_numpy().astype(np.float64)
|
||||
sale_dates = (
|
||||
sub["Date of last transaction"]
|
||||
.dt.epoch("d")
|
||||
.fill_null(-1)
|
||||
.to_numpy()
|
||||
.astype(np.int64)
|
||||
)
|
||||
tree = KDTree(_scale_coords(lat, lon))
|
||||
trees[tg] = (tree, psm)
|
||||
trees[tg] = (tree, psm, postcodes, prices, sale_dates)
|
||||
print(f" {tg}: {n:,}")
|
||||
|
||||
return trees
|
||||
|
||||
|
||||
def _sale_identity_matches(
|
||||
pool_postcodes: np.ndarray,
|
||||
pool_prices: np.ndarray,
|
||||
pool_sale_dates: np.ndarray,
|
||||
target_postcode: str,
|
||||
target_price: float,
|
||||
target_sale_date: int,
|
||||
) -> np.ndarray:
|
||||
if not target_postcode or not np.isfinite(target_price) or target_sale_date < 0:
|
||||
return np.zeros(len(pool_postcodes), dtype=bool)
|
||||
return (
|
||||
(pool_postcodes == target_postcode)
|
||||
& np.isfinite(pool_prices)
|
||||
& np.isclose(pool_prices, target_price, rtol=0.0, atol=0.5)
|
||||
& (pool_sale_dates == target_sale_date)
|
||||
)
|
||||
|
||||
|
||||
def knn_median_psm(
|
||||
trees: dict[str, tuple[KDTree, np.ndarray]],
|
||||
trees: dict[str, tuple[KDTree, np.ndarray, np.ndarray, np.ndarray, np.ndarray]],
|
||||
lat: np.ndarray,
|
||||
lon: np.ndarray,
|
||||
type_groups: np.ndarray,
|
||||
k: int = KNN_K,
|
||||
postcodes: np.ndarray | None = None,
|
||||
last_prices: np.ndarray | None = None,
|
||||
last_sale_dates: np.ndarray | None = None,
|
||||
) -> np.ndarray:
|
||||
"""Return median adjusted-PSM of k nearest neighbours for each target.
|
||||
|
||||
|
|
@ -133,21 +170,41 @@ def knn_median_psm(
|
|||
n = len(lat)
|
||||
result = np.full(n, np.nan)
|
||||
|
||||
for tg, (tree, psm) in trees.items():
|
||||
for tg, (tree, psm, pool_postcodes, pool_prices, pool_sale_dates) in trees.items():
|
||||
mask = (type_groups == tg) & np.isfinite(lat) & np.isfinite(lon)
|
||||
idx = np.where(mask)[0]
|
||||
if len(idx) == 0:
|
||||
continue
|
||||
|
||||
actual_k = min(k, len(psm))
|
||||
if actual_k < KNN_MIN_NEIGHBORS:
|
||||
query_k = min(max(k * 2, k + KNN_MIN_NEIGHBORS), len(psm))
|
||||
if query_k < KNN_MIN_NEIGHBORS:
|
||||
continue
|
||||
|
||||
coords = _scale_coords(lat[idx], lon[idx])
|
||||
_, nn_idx = tree.query(coords, k=actual_k)
|
||||
_, nn_idx = tree.query(coords, k=query_k)
|
||||
if nn_idx.ndim == 1:
|
||||
nn_idx = nn_idx.reshape(-1, 1)
|
||||
|
||||
result[idx] = np.nanmedian(psm[nn_idx], axis=1)
|
||||
medians = np.full(len(idx), np.nan)
|
||||
for row_num, target_idx in enumerate(idx):
|
||||
candidates = nn_idx[row_num]
|
||||
if (
|
||||
postcodes is not None
|
||||
and last_prices is not None
|
||||
and last_sale_dates is not None
|
||||
):
|
||||
same_sale = _sale_identity_matches(
|
||||
pool_postcodes[candidates],
|
||||
pool_prices[candidates],
|
||||
pool_sale_dates[candidates],
|
||||
str(postcodes[target_idx] or ""),
|
||||
float(last_prices[target_idx]),
|
||||
int(last_sale_dates[target_idx]),
|
||||
)
|
||||
candidates = candidates[~same_sale]
|
||||
if len(candidates) >= KNN_MIN_NEIGHBORS:
|
||||
medians[row_num] = np.nanmedian(psm[candidates[:k]])
|
||||
|
||||
result[idx] = medians
|
||||
|
||||
return result
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ TERRACE_TYPES = [
|
|||
"Terraced",
|
||||
]
|
||||
FLAT_TYPES = ["Flats/Maisonettes"]
|
||||
TYPE_GROUPS = ["Detached", "Semi-Detached", "Terraced", "Flats", "Bungalow"]
|
||||
TYPE_GROUPS = ["Detached", "Semi-Detached", "Terraced", "Flats"]
|
||||
SHRINKAGE_K = 50
|
||||
|
||||
|
||||
|
|
@ -30,8 +30,6 @@ def type_group_expr():
|
|||
.then(pl.lit("Terraced"))
|
||||
.when(pl.col("Property type").is_in(FLAT_TYPES))
|
||||
.then(pl.lit("Flats"))
|
||||
.when(pl.col("Property type") == "Bungalow")
|
||||
.then(pl.lit("Bungalow"))
|
||||
.when(pl.col("Property type").is_in(["Detached", "Semi-Detached"]))
|
||||
.then(pl.col("Property type"))
|
||||
.otherwise(pl.lit(None))
|
||||
|
|
@ -61,7 +59,7 @@ def hierarchy_keys(sector: str) -> tuple[str, str]:
|
|||
return district, area
|
||||
|
||||
|
||||
NON_REF_TYPES = ["Terraced", "Semi-Detached", "Flats", "Bungalow"]
|
||||
NON_REF_TYPES = ["Terraced", "Semi-Detached", "Flats"]
|
||||
|
||||
|
||||
def build_hedonic_features(df: pl.DataFrame) -> np.ndarray:
|
||||
|
|
|
|||
|
|
@ -15,12 +15,21 @@ DROP_CATEGORIES = {
|
|||
"amenity/bicycle_parking",
|
||||
"amenity/binoculars",
|
||||
"amenity/boot_scraper",
|
||||
"amenity/bus_garage",
|
||||
"amenity/check_in",
|
||||
"amenity/clock",
|
||||
"amenity/clothes_dryer",
|
||||
"amenity/coast_guard",
|
||||
"amenity/coffin_rest",
|
||||
"amenity/compressed_air",
|
||||
"amenity/court_yard",
|
||||
"amenity/donation_box",
|
||||
"amenity/dressing_room",
|
||||
"amenity/drinking_water",
|
||||
"emergency/water_tank",
|
||||
"leisure/bleachers",
|
||||
"leisure/schoolyard",
|
||||
"public_transport/pay_scale_area",
|
||||
"shop/taxi",
|
||||
"amenity/feeding_place",
|
||||
"amenity/fixme",
|
||||
|
|
@ -31,6 +40,7 @@ DROP_CATEGORIES = {
|
|||
"amenity/lounge",
|
||||
"tourism/preserved_railway",
|
||||
"amenity/lounger",
|
||||
"leisure/sport",
|
||||
"amenity/motorcycle_parking",
|
||||
"amenity/mounting_block",
|
||||
"amenity/notice_board",
|
||||
|
|
@ -71,8 +81,12 @@ DROP_CATEGORIES = {
|
|||
"amenity/boat_storage",
|
||||
"amenity/bureau_de_change",
|
||||
"amenity/bus_station",
|
||||
"amenity/beachhut",
|
||||
"amenity/canteen",
|
||||
"amenity/conference_centre",
|
||||
"amenity/crematorium",
|
||||
"amenity/disused",
|
||||
"amenity/driver_training",
|
||||
"amenity/driving_school",
|
||||
"amenity/escooter_rental",
|
||||
"amenity/ferry_terminal",
|
||||
|
|
@ -82,14 +96,21 @@ DROP_CATEGORIES = {
|
|||
"amenity/kick-scooter_rental",
|
||||
"amenity/money_transfer",
|
||||
"amenity/post_depot",
|
||||
"amenity/prison",
|
||||
"amenity/public_building",
|
||||
"amenity/recycling",
|
||||
"amenity/scout_hut",
|
||||
"amenity/social_facility",
|
||||
"amenity/studio",
|
||||
"amenity/student_accommodation",
|
||||
"amenity/taxi",
|
||||
"amenity/telephone_exchange",
|
||||
"amenity/training",
|
||||
"amenity/vehicle_inspection",
|
||||
"amenity/waiting_room",
|
||||
"amenity/yes",
|
||||
"shop/disused",
|
||||
"shop/no",
|
||||
# Buildings (except church & university which are mapped)
|
||||
"building/air_shaft",
|
||||
"building/apartments",
|
||||
|
|
@ -148,12 +169,14 @@ DROP_CATEGORIES = {
|
|||
"emergency/yes",
|
||||
"tourism/apartment",
|
||||
"tourism/apartments",
|
||||
"tourism/alpine_hut",
|
||||
"tourism/camp_pitch",
|
||||
"tourism/caravan_site",
|
||||
"tourism/information",
|
||||
"tourism/picnic_site",
|
||||
"tourism/viewpoint",
|
||||
"tourism/village_sign",
|
||||
"tourism/wilderness_hut",
|
||||
"tourism/yes",
|
||||
# Public transport (from NaPTAN instead)
|
||||
"public_transport/entrance",
|
||||
|
|
@ -191,6 +214,8 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"🍺",
|
||||
[
|
||||
"amenity/pub",
|
||||
"amenity/beer_garden",
|
||||
"amenity/biergarten",
|
||||
"amenity/social_club",
|
||||
"amenity/club",
|
||||
"leisure/social_club",
|
||||
|
|
@ -293,7 +318,13 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"leisure/miniature_golf",
|
||||
"leisure/horse_riding",
|
||||
"leisure/fishing",
|
||||
"leisure/ice_rink",
|
||||
"leisure/paddling_pool",
|
||||
"leisure/practice_pitch",
|
||||
"leisure/shooting_ground",
|
||||
"leisure/stadium",
|
||||
"leisure/swimming_pool",
|
||||
"leisure/swimming_area",
|
||||
"leisure/water_park",
|
||||
"leisure/bathing_place",
|
||||
],
|
||||
|
|
@ -307,9 +338,11 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"leisure/amusement_arcade",
|
||||
"leisure/adult_gaming_centre",
|
||||
"leisure/escape_game",
|
||||
"leisure/maze",
|
||||
"leisure/trampoline_park",
|
||||
"leisure/sauna",
|
||||
"leisure/tanning_salon",
|
||||
"shop/amusements",
|
||||
"tourism/theme_park",
|
||||
"amenity/bicycle_rental",
|
||||
"amenity/boat_rental",
|
||||
|
|
@ -345,6 +378,7 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
[
|
||||
"shop/bakery",
|
||||
"shop/pastry",
|
||||
"craft/bakery",
|
||||
"craft/confectionery",
|
||||
],
|
||||
),
|
||||
|
|
@ -364,6 +398,7 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
[
|
||||
"shop/greengrocer",
|
||||
"shop/farm",
|
||||
"shop/market",
|
||||
"amenity/marketplace",
|
||||
],
|
||||
),
|
||||
|
|
@ -424,6 +459,7 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"shop/appliance",
|
||||
"shop/electrical",
|
||||
"shop/hifi",
|
||||
"shop/vacuum_cleaner",
|
||||
"shop/video_games",
|
||||
"shop/games",
|
||||
],
|
||||
|
|
@ -444,7 +480,9 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
[
|
||||
"shop/doityourself",
|
||||
"shop/hardware",
|
||||
"shop/builders_merchant",
|
||||
"shop/paint",
|
||||
"shop/plumbing",
|
||||
],
|
||||
),
|
||||
(
|
||||
|
|
@ -462,11 +500,15 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"shop/curtain",
|
||||
"shop/flooring",
|
||||
"shop/fireplace",
|
||||
"shop/garden_furniture",
|
||||
"shop/groundskeeping",
|
||||
"shop/household",
|
||||
"shop/household_linen",
|
||||
"shop/houseware",
|
||||
"shop/homeware",
|
||||
"shop/interior_decoration",
|
||||
"shop/lighting",
|
||||
"shop/kitchenware",
|
||||
"shop/window_blind",
|
||||
],
|
||||
),
|
||||
|
|
@ -493,8 +535,11 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"🏕️",
|
||||
[
|
||||
"shop/sports",
|
||||
"shop/angling",
|
||||
"shop/outdoor",
|
||||
"shop/bicycle",
|
||||
"shop/equestrian",
|
||||
"shop/surf",
|
||||
],
|
||||
),
|
||||
(
|
||||
|
|
@ -532,9 +577,11 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"shop/music",
|
||||
"shop/musical_instrument",
|
||||
"shop/antiques",
|
||||
"shop/anime",
|
||||
"shop/baby_goods",
|
||||
"shop/fabric",
|
||||
"shop/haberdashery",
|
||||
"shop/hobby",
|
||||
"shop/wool",
|
||||
"shop/pottery",
|
||||
],
|
||||
|
|
@ -549,9 +596,13 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"shop/bookmaker",
|
||||
"shop/building_materials",
|
||||
"shop/camera",
|
||||
"shop/cannabis",
|
||||
"shop/car",
|
||||
"shop/caravan",
|
||||
"shop/catalogue",
|
||||
"shop/auction",
|
||||
"shop/auction_house",
|
||||
"shop/chandler",
|
||||
"shop/collector",
|
||||
"shop/copyshop",
|
||||
"shop/country_store",
|
||||
|
|
@ -560,6 +611,7 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"shop/erotic",
|
||||
"shop/esoteric",
|
||||
"shop/fan",
|
||||
"shop/fireworks",
|
||||
"shop/fishing",
|
||||
"shop/frame",
|
||||
"shop/fuel",
|
||||
|
|
@ -582,6 +634,8 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"shop/scuba_diving",
|
||||
"shop/security",
|
||||
"shop/sewing",
|
||||
"shop/ship_chandler",
|
||||
"shop/signs",
|
||||
"shop/storage_rental",
|
||||
"shop/swimming_pool",
|
||||
"shop/telecommunication",
|
||||
|
|
@ -590,7 +644,9 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"shop/tool_hire",
|
||||
"shop/trade",
|
||||
"shop/trophy",
|
||||
"shop/truck",
|
||||
"shop/vacant",
|
||||
"shop/van",
|
||||
"shop/video",
|
||||
"shop/water_sports",
|
||||
"shop/weapons",
|
||||
|
|
@ -611,6 +667,7 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"shop/cosmetics",
|
||||
"shop/massage",
|
||||
"shop/perfumery",
|
||||
"leisure/spa",
|
||||
],
|
||||
),
|
||||
(
|
||||
|
|
@ -757,6 +814,8 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
[
|
||||
"amenity/hospital",
|
||||
"amenity/clinic",
|
||||
"amenity/health_centre",
|
||||
"healthcare/blood_donation",
|
||||
"healthcare/hospital",
|
||||
"healthcare/centre",
|
||||
"healthcare/clinic",
|
||||
|
|
@ -804,6 +863,9 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
[
|
||||
"amenity/care_home",
|
||||
"amenity/nursing_home",
|
||||
"amenity/retirement_home",
|
||||
"healthcare/hospice",
|
||||
"healthcare/nursing_home",
|
||||
"office/home_care",
|
||||
],
|
||||
),
|
||||
|
|
@ -848,6 +910,7 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"⛪",
|
||||
[
|
||||
"amenity/place_of_worship",
|
||||
"amenity/monastery",
|
||||
"building/church",
|
||||
],
|
||||
),
|
||||
|
|
@ -873,6 +936,7 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"📸",
|
||||
[
|
||||
"tourism/attraction",
|
||||
"tourism/aquarium",
|
||||
"amenity/fountain",
|
||||
"amenity/courthouse",
|
||||
"tourism/chalet",
|
||||
|
|
@ -892,6 +956,7 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"building/university",
|
||||
"amenity/kindergarten",
|
||||
"amenity/childcare",
|
||||
"office/tutoring",
|
||||
],
|
||||
),
|
||||
(
|
||||
|
|
@ -904,6 +969,9 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"tourism/guest_house",
|
||||
"tourism/motel",
|
||||
"tourism/camp_site",
|
||||
"leisure/resort",
|
||||
"tourism/holiday_park",
|
||||
"tourism/self_catering",
|
||||
],
|
||||
),
|
||||
(
|
||||
|
|
@ -928,14 +996,19 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"craft/window_construction",
|
||||
"craft/agricultural_engines",
|
||||
"craft/atelier",
|
||||
"craft/beekeeper",
|
||||
"craft/blacksmith",
|
||||
"craft/bookbinder",
|
||||
"craft/boatbuilder",
|
||||
"craft/caterer",
|
||||
"craft/carpet_layer",
|
||||
"craft/clockmaker",
|
||||
"craft/handicraft",
|
||||
"craft/jeweller",
|
||||
"craft/metal_construction",
|
||||
"craft/photographer",
|
||||
"craft/photographic_laboratory",
|
||||
"craft/plasterer",
|
||||
"craft/pottery",
|
||||
"craft/printer",
|
||||
"craft/sawmill",
|
||||
|
|
@ -946,22 +1019,28 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"craft/upholsterer",
|
||||
"craft/watchmaker",
|
||||
"craft/yes",
|
||||
"amenity/workshop",
|
||||
"shop/glaziery",
|
||||
"shop/windows",
|
||||
# Professional offices & estate agents
|
||||
"shop/estate_agent",
|
||||
"office/accountant",
|
||||
"office/architect",
|
||||
"office/auctioneer",
|
||||
"office/builder",
|
||||
"office/construction",
|
||||
"office/construction_company",
|
||||
"office/engineer",
|
||||
"office/estate_agent",
|
||||
"office/financial",
|
||||
"office/financial_advisor",
|
||||
"office/financial_services",
|
||||
"office/insurance",
|
||||
"office/lawyer",
|
||||
"office/mortgage",
|
||||
"office/property_management",
|
||||
"office/solicitor",
|
||||
"office/solicitors",
|
||||
"office/surveyor",
|
||||
"office/tax_advisor",
|
||||
],
|
||||
|
|
@ -972,6 +1051,8 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"🏢",
|
||||
[
|
||||
"amenity/coworking_space",
|
||||
"amenity/research_institute",
|
||||
"office/administrative",
|
||||
"office/advertising_agency",
|
||||
"office/association",
|
||||
"office/charity",
|
||||
|
|
@ -997,12 +1078,15 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"office/notary",
|
||||
"office/political_party",
|
||||
"office/politician",
|
||||
"office/publisher",
|
||||
"office/quango",
|
||||
"office/recruitment",
|
||||
"office/religion",
|
||||
"office/research",
|
||||
"office/security",
|
||||
"office/taxi",
|
||||
"office/telecommunication",
|
||||
"office/transport",
|
||||
"office/union",
|
||||
"office/university",
|
||||
"office/vacant",
|
||||
|
|
@ -1032,7 +1116,11 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"Community Centre",
|
||||
"🤝",
|
||||
[
|
||||
"amenity/church_hall",
|
||||
"amenity/clubhouse",
|
||||
"amenity/community_centre",
|
||||
"amenity/community_hall",
|
||||
"amenity/scout_hall",
|
||||
"amenity/social_centre",
|
||||
"amenity/townhall",
|
||||
],
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue