This commit is contained in:
Andras Schmelczer 2026-03-15 21:22:28 +00:00
parent 479ef92236
commit c38d654ac7
44 changed files with 2526 additions and 701 deletions

View file

@ -112,7 +112,9 @@ def predict(test: pl.DataFrame, index: pl.DataFrame) -> pl.DataFrame:
def compute_metrics(actual: np.ndarray, predicted: np.ndarray) -> dict:
valid = np.isfinite(predicted) & np.isfinite(actual) & (actual > 0) & (predicted > 0)
valid = (
np.isfinite(predicted) & np.isfinite(actual) & (actual > 0) & (predicted > 0)
)
actual = actual[valid]
predicted = predicted[valid]
@ -176,7 +178,10 @@ def main():
"--input", type=Path, required=True, help="Path to properties.parquet"
)
parser.add_argument(
"--postcodes", type=Path, required=True, help="Path to postcode.parquet (for lat/lon)"
"--postcodes",
type=Path,
required=True,
help="Path to postcode.parquet (for lat/lon)",
)
parser.add_argument(
"--output", type=Path, required=True, help="Output backtest_results.parquet"
@ -185,7 +190,9 @@ def main():
# Build index from pre-test data only (temporal holdout)
print(f"Building price index (pairs with year2 < {TEST_YEAR_MIN})...")
index = build_index(args.input, max_pair_year=TEST_YEAR_MIN, postcodes_path=args.postcodes)
index = build_index(
args.input, max_pair_year=TEST_YEAR_MIN, postcodes_path=args.postcodes
)
print(
f"\nHoldout index: {len(index):,} rows, {index['sector'].n_unique():,} sectors, "
f"{index['type_group'].n_unique()} type groups"
@ -233,7 +240,9 @@ def main():
knn_est = knn_psm * fa * temporal_adj
n_knn = int((np.isfinite(knn_est) & (knn_est > 0)).sum())
print(f" kNN estimates: {n_knn:,} of {len(test):,} ({n_knn / len(test) * 100:.1f}%)")
print(
f" kNN estimates: {n_knn:,} of {len(test):,} ({n_knn / len(test) * 100:.1f}%)"
)
# Blend: (1-w)*index + w*kNN where both available
index_est = test["predicted"].to_numpy().astype(np.float64)