This commit is contained in:
Andras Schmelczer 2026-06-02 13:46:18 +01:00
parent a04ac2d857
commit d43da9708c
47 changed files with 4120 additions and 573 deletions

View file

@ -19,6 +19,8 @@ from tqdm import tqdm
from pipeline.transform.price_estimation.shrinkage import (
blend_dicts,
hierarchical_shrinkage,
reanchor_dict,
reanchor_dicts,
shrink_dicts,
spatial_smooth,
)
@ -431,6 +433,17 @@ def build_index(
f" {len(area_idx)} areas, {len(district_idx)} districts, {len(sector_idx)} sectors"
)
# Re-anchor every repeat-sales dict to the global base year before any
# shrinkage/smoothing/blending. solve_robust_index anchors each cell to
# log-index 0 at its OWN earliest year, so cells with shorter histories
# are measured from a later origin; combining them key-by-key would
# otherwise average level-incompatible numbers. The hedonic fallback is
# already anchored at min_year, so we align everything to min_year.
national_idx = reanchor_dict(national_idx, min_year)
area_idx = reanchor_dicts(area_idx, min_year)
district_idx = reanchor_dicts(district_idx, min_year)
sector_idx = reanchor_dicts(sector_idx, min_year)
# Shrinkage: national -> hedonic first, then hierarchical
print(" Applying shrinkage...")
national_shrunk = shrink_dicts(national_idx, hedonic_idx, national_n)