Fable findings in data
This commit is contained in:
parent
b98bc6d611
commit
6a33b03fdf
20 changed files with 1502 additions and 274 deletions
|
|
@ -12,6 +12,18 @@ V = TypeVar("V")
|
|||
SPATIAL_NEIGHBORS = 5
|
||||
SPATIAL_BLEND_K = 30
|
||||
|
||||
# Hard band on a sector's per-year index move RELATIVE to its parent (the
|
||||
# national index), enforced by winsorize_steps after spatial smoothing. The
|
||||
# support-scaled temporal smoothness prior still under-penalises years
|
||||
# identified by only 1-2 repeat-sale pairs in thin early histories, leaving
|
||||
# artefacts like a x9.7 single-year jump (log +2.27, sector "M3 1"
|
||||
# 1998->1999). A sector may genuinely outpace the nation -- regeneration, new
|
||||
# transport links -- but those stories play out over multiple years, not as a
|
||||
# one-year x9.7 step. +/-0.40 log/yr (~x1.5 in a year) relative to the
|
||||
# national move keeps every plausible genuine sector-level divergence while
|
||||
# clamping thin-year data artefacts.
|
||||
MAX_STEP_DEVIATION_PER_YEAR = 0.40
|
||||
|
||||
|
||||
def _base_value(index: dict[int, float], base_year: int) -> float:
|
||||
"""Value of an index dict at `base_year`, with forward/back-fill for gaps.
|
||||
|
|
@ -75,6 +87,42 @@ def lift_onto_parent(
|
|||
return {y: v + offset for y, v in child.items()}
|
||||
|
||||
|
||||
def winsorize_steps(
|
||||
child: dict[int, float],
|
||||
parent: dict[int, float],
|
||||
max_dev_per_year: float,
|
||||
) -> dict[int, float]:
|
||||
"""Clamp a child's per-year index steps to within a band of the parent's.
|
||||
|
||||
For each consecutive pair of solved years (y_prev, y) the child's per-year
|
||||
rate r = (child[y] - child[y_prev]) / (y - y_prev) is winsorised into
|
||||
[p - max_dev_per_year, p + max_dev_per_year], where p is the parent's
|
||||
per-year rate over the same span (via _base_value, so gaps in the parent's
|
||||
coverage are forward/back-filled rather than crashing). The series is then
|
||||
rebuilt cumulatively from the FIRST year's value, so:
|
||||
- the first year's level is preserved;
|
||||
- non-outlier steps are preserved exactly (later years simply shift by
|
||||
whatever the clamped steps removed);
|
||||
- a multi-year gap is judged on its per-year rate, not as one giant
|
||||
single-year move, so genuine level changes across gaps survive.
|
||||
|
||||
A child with <2 years has no steps to clamp; an empty parent only occurs
|
||||
in degenerate paths (build_index always passes the national index) -- both
|
||||
are returned unchanged.
|
||||
"""
|
||||
if len(child) < 2 or not parent:
|
||||
return child
|
||||
years = sorted(child)
|
||||
result = {years[0]: child[years[0]]}
|
||||
for y_prev, y in zip(years[:-1], years[1:]):
|
||||
span = y - y_prev
|
||||
r = (child[y] - child[y_prev]) / span
|
||||
p = (_base_value(parent, y) - _base_value(parent, y_prev)) / span
|
||||
r = min(max(r, p - max_dev_per_year), p + max_dev_per_year)
|
||||
result[y] = result[y_prev] + r * span
|
||||
return result
|
||||
|
||||
|
||||
def shrink_dicts(raw: dict, parent: dict, n: int) -> dict:
|
||||
"""Shrink dict values toward parent using n/(n+k) weighting.
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue