117 lines
5 KiB
Python
117 lines
5 KiB
Python
"""Regression tests for parent-base lifting before hierarchical blending.
|
|
|
|
solve_robust_index anchors every repeat-sales cell to log-index 0 at its OWN
|
|
earliest year, so a cell with a shorter history sits on a later origin than its
|
|
(wider) parent. shrink_dicts / blend_dicts combine dicts key-by-key, so a child
|
|
must first be lifted onto its parent's base at the child's first year, or the
|
|
blend averages level-incompatible numbers (fix5-index-base-year).
|
|
|
|
Note: re-anchoring each cell to the *global* base year is a no-op on real data
|
|
(a cell anchored to 0 at its own earliest year already reads 0 there, and the
|
|
global base is never later), which is why the fix lifts onto the *parent* at the
|
|
child's own start year instead.
|
|
"""
|
|
|
|
from pipeline.transform.price_estimation.shrinkage import (
|
|
hierarchical_shrinkage,
|
|
lift_onto_parent,
|
|
shrink_dicts,
|
|
)
|
|
from pipeline.transform.price_estimation.utils import SHRINKAGE_K
|
|
|
|
|
|
def test_lift_rebases_late_starting_child_onto_parent():
|
|
"""A child anchored at its own later start year is lifted to the parent's level there."""
|
|
parent = {1996: 0.0, 2008: 0.80, 2016: 1.20, 2024: 1.50}
|
|
# Sector with its own repeat-sales data only from 2016, anchored at 2016 = 0.
|
|
sector = {2016: 0.0, 2024: 0.20}
|
|
|
|
lifted = lift_onto_parent(sector, parent)
|
|
|
|
# child[start] now equals the parent's accumulated level at that year.
|
|
assert abs(lifted[2016] - parent[2016]) < 1e-12 # 1.20
|
|
assert abs(lifted[2024] - (parent[2016] + 0.20)) < 1e-12 # 1.40
|
|
# Pure constant shift: the child's own year-to-year move is preserved.
|
|
assert abs((lifted[2024] - lifted[2016]) - (sector[2024] - sector[2016])) < 1e-12
|
|
|
|
|
|
def test_lift_is_noop_when_child_starts_at_parent_base():
|
|
"""A child whose earliest year is the parent's base (value 0) is unchanged."""
|
|
parent = {1996: 0.0, 2008: 0.80, 2016: 1.20}
|
|
child = {1996: 0.0, 2008: 0.75, 2016: 1.10}
|
|
assert lift_onto_parent(child, parent) == child
|
|
|
|
|
|
def test_lift_handles_empty_inputs():
|
|
assert lift_onto_parent({}, {2000: 0.0}) == {}
|
|
assert lift_onto_parent({2000: 0.0}, {}) == {2000: 0.0}
|
|
|
|
|
|
def test_lift_fixes_estimate_spanning_child_start_but_not_within_range():
|
|
"""The lift corrects comparisons that span the cell's start year, and ONLY those.
|
|
|
|
A property sold in 2008 (before the sector's own data begins in 2016) and
|
|
valued in 2024: pre-lift the shrunk index mixes a 2016-based sector level
|
|
with 1996-based parent levels and badly understates the move. Comparisons
|
|
wholly inside the sector's own range (2016->2024) are unchanged, because the
|
|
lift is a pure constant shift that cancels in a within-cell difference.
|
|
"""
|
|
parent = {1996: 0.0, 2008: 0.80, 2016: 1.20, 2024: 1.50}
|
|
sector = {2016: 0.0, 2024: 0.20} # own data starts 2016
|
|
n = 30
|
|
w = n / (n + SHRINKAGE_K)
|
|
|
|
raw = shrink_dicts(sector, parent, n) # pre-fix: blend without lifting
|
|
fixed = shrink_dicts(lift_onto_parent(sector, parent), parent, n)
|
|
|
|
# Within the sector's own range the lift changes nothing.
|
|
assert abs((fixed[2024] - fixed[2016]) - (raw[2024] - raw[2016])) < 1e-12
|
|
|
|
# 2008 is parent-only in both (sector absent), so both read parent[2008].
|
|
assert abs(raw[2008] - parent[2008]) < 1e-12
|
|
assert abs(fixed[2008] - parent[2008]) < 1e-12
|
|
|
|
raw_move = raw[2024] - raw[2008]
|
|
fixed_move = fixed[2024] - fixed[2008]
|
|
# Hand-computed: raw[2024] = w*0.20 + (1-w)*1.50; fixed[2024] = w*1.40 + (1-w)*1.50.
|
|
assert abs(raw_move - ((w * 0.20 + (1 - w) * 1.50) - 0.80)) < 1e-12
|
|
assert abs(fixed_move - ((w * 1.40 + (1 - w) * 1.50) - 0.80)) < 1e-12
|
|
# The fix raises the spanning move by exactly the parent growth to the
|
|
# sector's start year that the raw blend dropped (weighted by w).
|
|
assert abs((fixed_move - raw_move) - w * parent[2016]) < 1e-12
|
|
# Fixed move is close to the true area-level move (0.70); raw badly understates it.
|
|
assert abs(fixed_move - 0.70) < 0.2
|
|
assert raw_move < 0.4 * fixed_move
|
|
|
|
|
|
def test_hierarchical_shrinkage_lift_fn_only_changes_spanning_comparisons():
|
|
"""Integration: passing lift_fn re-bases a late-starting sector via its parent chain."""
|
|
top = {1996: 0.0, 2008: 0.80, 2016: 1.20, 2024: 1.50}
|
|
sector = {"AB1 1": {2016: 0.0, 2024: 0.20}}
|
|
sector_n = {"AB1 1": 300}
|
|
# No own area/district indices -> the sector shrinks straight toward `top`.
|
|
base_args = (
|
|
sector,
|
|
sector_n,
|
|
{},
|
|
{},
|
|
{},
|
|
{},
|
|
top,
|
|
["AB1 1"],
|
|
{"AB1 1": "AB1"},
|
|
{"AB1": "AB"},
|
|
shrink_dicts,
|
|
)
|
|
|
|
without_lift = hierarchical_shrinkage(*base_args)["AB1 1"]
|
|
with_lift = hierarchical_shrinkage(*base_args, lift_onto_parent)["AB1 1"]
|
|
|
|
# Within the sector's own range: identical (pure constant shift cancels).
|
|
assert abs(
|
|
(with_lift[2024] - with_lift[2016]) - (without_lift[2024] - without_lift[2016])
|
|
) < 1e-12
|
|
# Spanning the sector's start year: the lift raises the 2008->2024 move.
|
|
assert (with_lift[2024] - with_lift[2008]) > (
|
|
without_lift[2024] - without_lift[2008]
|
|
) + 0.1
|