Improve data pipeline

This commit is contained in:
Andras Schmelczer 2026-06-01 20:10:03 +01:00
parent e8345cbdc1
commit f99bd4e5c9
36 changed files with 966 additions and 129 deletions

View file

@ -22,6 +22,13 @@ FLAT_TYPES = ["Flats/Maisonettes"]
TYPE_GROUPS = ["Detached", "Semi-Detached", "Terraced", "Flats"]
SHRINKAGE_K = 50
# Temporal regularization for the repeat-sales index: a second-difference
# (curvature) penalty lambda * sum((beta_t - 2*beta_{t-1} + beta_{t-2})^2) added
# to the IRLS solve. A mild penalty damps single-year index spikes (which would
# otherwise distort the estimate of any property whose last sale landed on a
# noisy year) without flattening genuine multi-year trends.
TEMPORAL_SMOOTHNESS_LAMBDA = 0.05
def type_group_expr():
"""Polars expression: Property type -> type_group."""