Improve data
This commit is contained in:
parent
b4d66a28c1
commit
85da1941aa
31 changed files with 901 additions and 319 deletions
|
|
@ -25,6 +25,7 @@ from pipeline.transform.price_estimation.shrinkage import (
|
|||
)
|
||||
from pipeline.transform.price_estimation.utils import (
|
||||
CURRENT_YEAR,
|
||||
LATEST_COMPLETE_YEAR,
|
||||
TEMPORAL_SMOOTHNESS_LAMBDA,
|
||||
TYPE_GROUPS,
|
||||
build_hedonic_features,
|
||||
|
|
@ -395,14 +396,22 @@ def build_index(
|
|||
The index is still forward-filled to CURRENT_YEAR.
|
||||
postcodes_path: if provided, lat/lon are read from this file instead of input_path.
|
||||
"""
|
||||
pairs = extract_pairs(input_path, max_year2=max_pair_year)
|
||||
# Solve the index only on COMPLETE calendar years: exclude the partial
|
||||
# current year, whose thin repeat-sale set yields wild betas. The index is
|
||||
# still forward-filled/trend-extrapolated to CURRENT_YEAR below, so 2026
|
||||
# follows the established trend rather than a partial-year spike. Backtest
|
||||
# passes a stricter max_pair_year, which is honoured.
|
||||
estimation_cap = (
|
||||
max_pair_year if max_pair_year is not None else LATEST_COMPLETE_YEAR + 1
|
||||
)
|
||||
pairs = extract_pairs(input_path, max_year2=estimation_cap)
|
||||
centroids = extract_centroids(postcodes_path or input_path)
|
||||
|
||||
min_year = int(pairs["year1"].min())
|
||||
max_year = CURRENT_YEAR
|
||||
|
||||
hedonic_idx = compute_hedonic_index(
|
||||
input_path, min_year, max_year, max_sale_year=max_pair_year
|
||||
input_path, min_year, max_year, max_sale_year=estimation_cap
|
||||
)
|
||||
|
||||
# Precompute hierarchy
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue