idgf
This commit is contained in:
parent
fbfebc651c
commit
aab85fe32e
33 changed files with 2016 additions and 283 deletions
|
|
@ -1,99 +1,117 @@
|
|||
"""Regression tests for common-base-year re-anchoring before blending.
|
||||
"""Regression tests for parent-base lifting before hierarchical blending.
|
||||
|
||||
Each repeat-sales index dict is anchored to log-index 0 at its OWN earliest
|
||||
year. shrink_dicts / blend_dicts combine dicts key-by-key, so dicts anchored to
|
||||
different base years must be re-anchored to a single common base first, or the
|
||||
solve_robust_index anchors every repeat-sales cell to log-index 0 at its OWN
|
||||
earliest year, so a cell with a shorter history sits on a later origin than its
|
||||
(wider) parent. shrink_dicts / blend_dicts combine dicts key-by-key, so a child
|
||||
must first be lifted onto its parent's base at the child's first year, or the
|
||||
blend averages level-incompatible numbers (fix5-index-base-year).
|
||||
|
||||
Note: re-anchoring each cell to the *global* base year is a no-op on real data
|
||||
(a cell anchored to 0 at its own earliest year already reads 0 there, and the
|
||||
global base is never later), which is why the fix lifts onto the *parent* at the
|
||||
child's own start year instead.
|
||||
"""
|
||||
|
||||
from pipeline.transform.price_estimation.shrinkage import (
|
||||
blend_dicts,
|
||||
reanchor_dict,
|
||||
reanchor_dicts,
|
||||
hierarchical_shrinkage,
|
||||
lift_onto_parent,
|
||||
shrink_dicts,
|
||||
)
|
||||
from pipeline.transform.price_estimation.utils import SHRINKAGE_K
|
||||
|
||||
|
||||
def test_reanchor_is_pure_constant_shift_preserving_differences():
|
||||
"""Re-anchoring only shifts the origin; year-to-year moves are unchanged."""
|
||||
# Anchored at its own earliest year 2008.
|
||||
idx = {2008: 0.0, 2009: 0.10, 2010: 0.25, 2011: 0.40}
|
||||
def test_lift_rebases_late_starting_child_onto_parent():
|
||||
"""A child anchored at its own later start year is lifted to the parent's level there."""
|
||||
parent = {1996: 0.0, 2008: 0.80, 2016: 1.20, 2024: 1.50}
|
||||
# Sector with its own repeat-sales data only from 2016, anchored at 2016 = 0.
|
||||
sector = {2016: 0.0, 2024: 0.20}
|
||||
|
||||
reanchored = reanchor_dict(idx, 1996)
|
||||
# 1996 is before this dict's history -> back-fill earliest value (0.0),
|
||||
# so the shift is 0 and the dict is unchanged.
|
||||
assert reanchored[2008] == 0.0
|
||||
lifted = lift_onto_parent(sector, parent)
|
||||
|
||||
# Same shape, different exact-hit base year: anchoring at 2010 subtracts 0.25.
|
||||
reanchored_2010 = reanchor_dict(idx, 2010)
|
||||
assert reanchored_2010[2010] == 0.0
|
||||
# All within-dict differences are preserved under the constant shift.
|
||||
years = sorted(idx)
|
||||
for a, b in zip(years, years[1:]):
|
||||
assert abs((reanchored_2010[b] - reanchored_2010[a]) - (idx[b] - idx[a])) < 1e-12
|
||||
# child[start] now equals the parent's accumulated level at that year.
|
||||
assert abs(lifted[2016] - parent[2016]) < 1e-12 # 1.20
|
||||
assert abs(lifted[2024] - (parent[2016] + 0.20)) < 1e-12 # 1.40
|
||||
# Pure constant shift: the child's own year-to-year move is preserved.
|
||||
assert abs((lifted[2024] - lifted[2016]) - (sector[2024] - sector[2016])) < 1e-12
|
||||
|
||||
|
||||
def test_blend_different_base_years_needs_reanchoring():
|
||||
"""Blending two dicts on different bases is biased unless re-anchored first.
|
||||
def test_lift_is_noop_when_child_starts_at_parent_base():
|
||||
"""A child whose earliest year is the parent's base (value 0) is unchanged."""
|
||||
parent = {1996: 0.0, 2008: 0.80, 2016: 1.20}
|
||||
child = {1996: 0.0, 2008: 0.75, 2016: 1.10}
|
||||
assert lift_onto_parent(child, parent) == child
|
||||
|
||||
Both cells observe the common base year 1996 but were anchored to DIFFERENT
|
||||
origins (sectorA at 1996, sectorB at 2008, as solve_robust_index would do for
|
||||
cells whose pair history starts at different years). They describe the SAME
|
||||
true trajectory measured from 1996, so a 50/50 blend should reproduce that
|
||||
common level. Pre-fix, blend_dicts mixes sectorB's 2008-relative numbers with
|
||||
sectorA's 1996-relative numbers, level-shifting the smoothed result.
|
||||
|
||||
def test_lift_handles_empty_inputs():
|
||||
assert lift_onto_parent({}, {2000: 0.0}) == {}
|
||||
assert lift_onto_parent({2000: 0.0}, {}) == {2000: 0.0}
|
||||
|
||||
|
||||
def test_lift_fixes_estimate_spanning_child_start_but_not_within_range():
|
||||
"""The lift corrects comparisons that span the cell's start year, and ONLY those.
|
||||
|
||||
A property sold in 2008 (before the sector's own data begins in 2016) and
|
||||
valued in 2024: pre-lift the shrunk index mixes a 2016-based sector level
|
||||
with 1996-based parent levels and badly understates the move. Comparisons
|
||||
wholly inside the sector's own range (2016->2024) are unchanged, because the
|
||||
lift is a pure constant shift that cancels in a within-cell difference.
|
||||
"""
|
||||
base_year = 1996
|
||||
parent = {1996: 0.0, 2008: 0.80, 2016: 1.20, 2024: 1.50}
|
||||
sector = {2016: 0.0, 2024: 0.20} # own data starts 2016
|
||||
n = 30
|
||||
w = n / (n + SHRINKAGE_K)
|
||||
|
||||
# True log-levels relative to 1996 (identical trajectory for both cells).
|
||||
truth = {1996: 0.0, 2008: 0.80, 2012: 1.00}
|
||||
raw = shrink_dicts(sector, parent, n) # pre-fix: blend without lifting
|
||||
fixed = shrink_dicts(lift_onto_parent(sector, parent), parent, n)
|
||||
|
||||
# sectorA: anchored at 1996 (its earliest year) -> equals truth.
|
||||
sector_a = dict(truth)
|
||||
# sectorB: same trajectory but anchored at 2008 (subtract truth[2008] from
|
||||
# every year), exactly how solve_robust_index would express a cell whose
|
||||
# earliest year happened to be picked as 2008.
|
||||
shift_b = truth[2008]
|
||||
sector_b = {y: v - shift_b for y, v in truth.items()}
|
||||
# Within the sector's own range the lift changes nothing.
|
||||
assert abs((fixed[2024] - fixed[2016]) - (raw[2024] - raw[2016])) < 1e-12
|
||||
|
||||
# --- Pre-fix behaviour: blend the raw dicts directly. ---
|
||||
raw_blend = blend_dicts(sector_a, [sector_b], 0.5, [0.5])
|
||||
# Every year is pulled by half of shift_b (0.4) away from the truth.
|
||||
assert abs(raw_blend[2012] - truth[2012]) > 0.3
|
||||
assert abs(raw_blend[1996] - truth[1996]) > 0.3
|
||||
# 2008 is parent-only in both (sector absent), so both read parent[2008].
|
||||
assert abs(raw[2008] - parent[2008]) < 1e-12
|
||||
assert abs(fixed[2008] - parent[2008]) < 1e-12
|
||||
|
||||
# --- Post-fix behaviour: re-anchor to the common base, THEN blend. ---
|
||||
reanchored = reanchor_dicts({"A": sector_a, "B": sector_b}, base_year)
|
||||
fixed_blend = blend_dicts(reanchored["A"], [reanchored["B"]], 0.5, [0.5])
|
||||
# Both cells now read 0 at 1996 and the true level at every shared year.
|
||||
for y in truth:
|
||||
assert abs(fixed_blend[y] - truth[y]) < 1e-9
|
||||
raw_move = raw[2024] - raw[2008]
|
||||
fixed_move = fixed[2024] - fixed[2008]
|
||||
# Hand-computed: raw[2024] = w*0.20 + (1-w)*1.50; fixed[2024] = w*1.40 + (1-w)*1.50.
|
||||
assert abs(raw_move - ((w * 0.20 + (1 - w) * 1.50) - 0.80)) < 1e-12
|
||||
assert abs(fixed_move - ((w * 1.40 + (1 - w) * 1.50) - 0.80)) < 1e-12
|
||||
# The fix raises the spanning move by exactly the parent growth to the
|
||||
# sector's start year that the raw blend dropped (weighted by w).
|
||||
assert abs((fixed_move - raw_move) - w * parent[2016]) < 1e-12
|
||||
# Fixed move is close to the true area-level move (0.70); raw badly understates it.
|
||||
assert abs(fixed_move - 0.70) < 0.2
|
||||
assert raw_move < 0.4 * fixed_move
|
||||
|
||||
|
||||
def test_shrink_dicts_after_reanchoring_is_consistent():
|
||||
"""Shrinking a cell toward its parent must use a common origin."""
|
||||
base_year = 2000
|
||||
# Parent (national) anchored at 2000.
|
||||
parent = {2000: 0.0, 2010: 0.50, 2020: 1.20}
|
||||
# Sector tracking the parent exactly but anchored at 2010 (subtract 0.50 from
|
||||
# every year), as solve_robust_index would express a cell whose earliest year
|
||||
# is later. It still observes the 2000 base year (value -0.50).
|
||||
sector = {2000: -0.50, 2010: 0.0, 2020: 0.70}
|
||||
n = 0 # no own data weight -> result should equal parent after anchoring
|
||||
def test_hierarchical_shrinkage_lift_fn_only_changes_spanning_comparisons():
|
||||
"""Integration: passing lift_fn re-bases a late-starting sector via its parent chain."""
|
||||
top = {1996: 0.0, 2008: 0.80, 2016: 1.20, 2024: 1.50}
|
||||
sector = {"AB1 1": {2016: 0.0, 2024: 0.20}}
|
||||
sector_n = {"AB1 1": 300}
|
||||
# No own area/district indices -> the sector shrinks straight toward `top`.
|
||||
base_args = (
|
||||
sector,
|
||||
sector_n,
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
top,
|
||||
["AB1 1"],
|
||||
{"AB1 1": "AB1"},
|
||||
{"AB1": "AB"},
|
||||
shrink_dicts,
|
||||
)
|
||||
|
||||
reanchored_sector = reanchor_dict(sector, base_year)
|
||||
# Exact hit on 2000 subtracts -0.50, putting the sector back on the parent's
|
||||
# origin: 0.0 at 2000, 0.50 at 2010, 1.20 at 2020.
|
||||
shrunk = shrink_dicts(reanchored_sector, parent, n)
|
||||
assert abs(shrunk[2000] - 0.0) < 1e-9
|
||||
assert abs(shrunk[2010] - 0.50) < 1e-9
|
||||
assert abs(shrunk[2020] - 1.20) < 1e-9
|
||||
without_lift = hierarchical_shrinkage(*base_args)["AB1 1"]
|
||||
with_lift = hierarchical_shrinkage(*base_args, lift_onto_parent)["AB1 1"]
|
||||
|
||||
|
||||
def test_reanchor_exact_hit_shifts_all_years():
|
||||
"""When the base year is present, subtract its value from every year."""
|
||||
idx = {1996: 0.0, 2005: 0.30, 2015: 0.90}
|
||||
reanchored = reanchor_dict(idx, 2005)
|
||||
assert reanchored[2005] == 0.0
|
||||
assert abs(reanchored[1996] - (-0.30)) < 1e-12
|
||||
assert abs(reanchored[2015] - 0.60) < 1e-12
|
||||
# Within the sector's own range: identical (pure constant shift cancels).
|
||||
assert abs(
|
||||
(with_lift[2024] - with_lift[2016]) - (without_lift[2024] - without_lift[2016])
|
||||
) < 1e-12
|
||||
# Spanning the sector's start year: the lift raises the 2008->2024 move.
|
||||
assert (with_lift[2024] - with_lift[2008]) > (
|
||||
without_lift[2024] - without_lift[2008]
|
||||
) + 0.1
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue