This commit is contained in:
Andras Schmelczer 2026-06-02 13:46:18 +01:00
parent a04ac2d857
commit d43da9708c
47 changed files with 4120 additions and 573 deletions

View file

@ -11,9 +11,9 @@ from pathlib import Path
import numpy as np
import polars as pl
from pipeline.transform.price_estimation.estimate import guarded_blend_estimates
from pipeline.transform.price_estimation.index import build_index
from pipeline.transform.price_estimation.knn import (
KNN_BLEND_WEIGHT,
build_knn_pool,
knn_median_psm,
)
@ -115,7 +115,10 @@ def predict(test: pl.DataFrame, index: pl.DataFrame) -> pl.DataFrame:
.clip(-MAX_LOG_ADJUSTMENT, MAX_LOG_ADJUSTMENT)
.exp()
)
.fill_null(pl.col("input_price").cast(pl.Float64))
# Keep null when the index can't be interpolated, matching production
# (estimate.py ships null there). compute_metrics filters to finite
# positive predictions, so these rows correctly drop from the Index n
# rather than silently degrading to the Naive prediction.
.alias("predicted"),
)
return test
@ -265,13 +268,12 @@ def main():
f" kNN estimates: {n_knn:,} of {len(test):,} ({n_knn / len(test) * 100:.1f}%)"
)
# Blend: (1-w)*index + w*kNN where both available
# Blend with the exact shipped estimator (stability gate + last-price cap +
# null-when-no-index) so the "Blended" stage reflects production accuracy.
# input_price is the backtest equivalent of production's "Last known price".
index_est = test["predicted"].to_numpy().astype(np.float64)
knn_valid = np.isfinite(knn_est) & (knn_est > 0)
blended = np.where(
knn_valid & np.isfinite(index_est),
(1 - KNN_BLEND_WEIGHT) * index_est + KNN_BLEND_WEIGHT * knn_est,
np.where(np.isfinite(index_est), index_est, knn_est),
blended = guarded_blend_estimates(
index_est, knn_est, test["input_price"].cast(pl.Float64).to_numpy()
)
actual = test["actual_price"].to_numpy().astype(np.float64)