"""Regression tests for common-base-year re-anchoring before blending. Each repeat-sales index dict is anchored to log-index 0 at its OWN earliest year. shrink_dicts / blend_dicts combine dicts key-by-key, so dicts anchored to different base years must be re-anchored to a single common base first, or the blend averages level-incompatible numbers (fix5-index-base-year). """ from pipeline.transform.price_estimation.shrinkage import ( blend_dicts, reanchor_dict, reanchor_dicts, shrink_dicts, ) def test_reanchor_is_pure_constant_shift_preserving_differences(): """Re-anchoring only shifts the origin; year-to-year moves are unchanged.""" # Anchored at its own earliest year 2008. idx = {2008: 0.0, 2009: 0.10, 2010: 0.25, 2011: 0.40} reanchored = reanchor_dict(idx, 1996) # 1996 is before this dict's history -> back-fill earliest value (0.0), # so the shift is 0 and the dict is unchanged. assert reanchored[2008] == 0.0 # Same shape, different exact-hit base year: anchoring at 2010 subtracts 0.25. reanchored_2010 = reanchor_dict(idx, 2010) assert reanchored_2010[2010] == 0.0 # All within-dict differences are preserved under the constant shift. years = sorted(idx) for a, b in zip(years, years[1:]): assert abs((reanchored_2010[b] - reanchored_2010[a]) - (idx[b] - idx[a])) < 1e-12 def test_blend_different_base_years_needs_reanchoring(): """Blending two dicts on different bases is biased unless re-anchored first. Both cells observe the common base year 1996 but were anchored to DIFFERENT origins (sectorA at 1996, sectorB at 2008, as solve_robust_index would do for cells whose pair history starts at different years). They describe the SAME true trajectory measured from 1996, so a 50/50 blend should reproduce that common level. Pre-fix, blend_dicts mixes sectorB's 2008-relative numbers with sectorA's 1996-relative numbers, level-shifting the smoothed result. """ base_year = 1996 # True log-levels relative to 1996 (identical trajectory for both cells). truth = {1996: 0.0, 2008: 0.80, 2012: 1.00} # sectorA: anchored at 1996 (its earliest year) -> equals truth. sector_a = dict(truth) # sectorB: same trajectory but anchored at 2008 (subtract truth[2008] from # every year), exactly how solve_robust_index would express a cell whose # earliest year happened to be picked as 2008. shift_b = truth[2008] sector_b = {y: v - shift_b for y, v in truth.items()} # --- Pre-fix behaviour: blend the raw dicts directly. --- raw_blend = blend_dicts(sector_a, [sector_b], 0.5, [0.5]) # Every year is pulled by half of shift_b (0.4) away from the truth. assert abs(raw_blend[2012] - truth[2012]) > 0.3 assert abs(raw_blend[1996] - truth[1996]) > 0.3 # --- Post-fix behaviour: re-anchor to the common base, THEN blend. --- reanchored = reanchor_dicts({"A": sector_a, "B": sector_b}, base_year) fixed_blend = blend_dicts(reanchored["A"], [reanchored["B"]], 0.5, [0.5]) # Both cells now read 0 at 1996 and the true level at every shared year. for y in truth: assert abs(fixed_blend[y] - truth[y]) < 1e-9 def test_shrink_dicts_after_reanchoring_is_consistent(): """Shrinking a cell toward its parent must use a common origin.""" base_year = 2000 # Parent (national) anchored at 2000. parent = {2000: 0.0, 2010: 0.50, 2020: 1.20} # Sector tracking the parent exactly but anchored at 2010 (subtract 0.50 from # every year), as solve_robust_index would express a cell whose earliest year # is later. It still observes the 2000 base year (value -0.50). sector = {2000: -0.50, 2010: 0.0, 2020: 0.70} n = 0 # no own data weight -> result should equal parent after anchoring reanchored_sector = reanchor_dict(sector, base_year) # Exact hit on 2000 subtracts -0.50, putting the sector back on the parent's # origin: 0.0 at 2000, 0.50 at 2010, 1.20 at 2020. shrunk = shrink_dicts(reanchored_sector, parent, n) assert abs(shrunk[2000] - 0.0) < 1e-9 assert abs(shrunk[2010] - 0.50) < 1e-9 assert abs(shrunk[2020] - 1.20) < 1e-9 def test_reanchor_exact_hit_shifts_all_years(): """When the base year is present, subtract its value from every year.""" idx = {1996: 0.0, 2005: 0.30, 2015: 0.90} reanchored = reanchor_dict(idx, 2005) assert reanchored[2005] == 0.0 assert abs(reanchored[1996] - (-0.30)) < 1e-12 assert abs(reanchored[2015] - 0.60) < 1e-12