Fmt
This commit is contained in:
parent
479ef92236
commit
c38d654ac7
44 changed files with 2526 additions and 701 deletions
|
|
@ -112,7 +112,9 @@ def predict(test: pl.DataFrame, index: pl.DataFrame) -> pl.DataFrame:
|
|||
|
||||
|
||||
def compute_metrics(actual: np.ndarray, predicted: np.ndarray) -> dict:
|
||||
valid = np.isfinite(predicted) & np.isfinite(actual) & (actual > 0) & (predicted > 0)
|
||||
valid = (
|
||||
np.isfinite(predicted) & np.isfinite(actual) & (actual > 0) & (predicted > 0)
|
||||
)
|
||||
actual = actual[valid]
|
||||
predicted = predicted[valid]
|
||||
|
||||
|
|
@ -176,7 +178,10 @@ def main():
|
|||
"--input", type=Path, required=True, help="Path to properties.parquet"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--postcodes", type=Path, required=True, help="Path to postcode.parquet (for lat/lon)"
|
||||
"--postcodes",
|
||||
type=Path,
|
||||
required=True,
|
||||
help="Path to postcode.parquet (for lat/lon)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output", type=Path, required=True, help="Output backtest_results.parquet"
|
||||
|
|
@ -185,7 +190,9 @@ def main():
|
|||
|
||||
# Build index from pre-test data only (temporal holdout)
|
||||
print(f"Building price index (pairs with year2 < {TEST_YEAR_MIN})...")
|
||||
index = build_index(args.input, max_pair_year=TEST_YEAR_MIN, postcodes_path=args.postcodes)
|
||||
index = build_index(
|
||||
args.input, max_pair_year=TEST_YEAR_MIN, postcodes_path=args.postcodes
|
||||
)
|
||||
print(
|
||||
f"\nHoldout index: {len(index):,} rows, {index['sector'].n_unique():,} sectors, "
|
||||
f"{index['type_group'].n_unique()} type groups"
|
||||
|
|
@ -233,7 +240,9 @@ def main():
|
|||
knn_est = knn_psm * fa * temporal_adj
|
||||
|
||||
n_knn = int((np.isfinite(knn_est) & (knn_est > 0)).sum())
|
||||
print(f" kNN estimates: {n_knn:,} of {len(test):,} ({n_knn / len(test) * 100:.1f}%)")
|
||||
print(
|
||||
f" kNN estimates: {n_knn:,} of {len(test):,} ({n_knn / len(test) * 100:.1f}%)"
|
||||
)
|
||||
|
||||
# Blend: (1-w)*index + w*kNN where both available
|
||||
index_est = test["predicted"].to_numpy().astype(np.float64)
|
||||
|
|
|
|||
|
|
@ -107,9 +107,7 @@ def main():
|
|||
pl.when(has_price)
|
||||
.then(
|
||||
pl.col("Last known price").cast(pl.Float64)
|
||||
* (
|
||||
pl.col("_log_index_current_interp") - pl.col("_log_index_sale_interp")
|
||||
)
|
||||
* (pl.col("_log_index_current_interp") - pl.col("_log_index_sale_interp"))
|
||||
.clip(-MAX_LOG_ADJUSTMENT, MAX_LOG_ADJUSTMENT)
|
||||
.exp()
|
||||
)
|
||||
|
|
|
|||
|
|
@ -105,9 +105,7 @@ def extract_pairs(input_path: Path, max_year2: int | None = None) -> pl.DataFram
|
|||
.alias("log_ratio"),
|
||||
(
|
||||
1.0
|
||||
/ (pl.col("frac_year2") - pl.col("frac_year1"))
|
||||
.cast(pl.Float64)
|
||||
.sqrt()
|
||||
/ (pl.col("frac_year2") - pl.col("frac_year1")).cast(pl.Float64).sqrt()
|
||||
).alias("weight"),
|
||||
)
|
||||
.filter(pl.col("log_ratio").abs() <= OUTLIER_THRESHOLD)
|
||||
|
|
@ -453,8 +451,12 @@ def main():
|
|||
description="Build improved repeat-sales price index"
|
||||
)
|
||||
parser.add_argument("--input", type=Path, required=True)
|
||||
parser.add_argument("--postcodes", type=Path, required=True,
|
||||
help="Path to postcode.parquet (for lat/lon centroids)")
|
||||
parser.add_argument(
|
||||
"--postcodes",
|
||||
type=Path,
|
||||
required=True,
|
||||
help="Path to postcode.parquet (for lat/lon centroids)",
|
||||
)
|
||||
parser.add_argument("--output", type=Path, required=True)
|
||||
args = parser.parse_args()
|
||||
|
||||
|
|
|
|||
|
|
@ -43,48 +43,39 @@ def build_knn_pool(
|
|||
"""
|
||||
print("Building kNN pool...")
|
||||
lf = pl.scan_parquet(source) if isinstance(source, Path) else source
|
||||
query = (
|
||||
lf
|
||||
.select(
|
||||
"Postcode",
|
||||
"Property type",
|
||||
"lat",
|
||||
"lon",
|
||||
"Total floor area (sqm)",
|
||||
"Last known price",
|
||||
"Date of last transaction",
|
||||
)
|
||||
.filter(
|
||||
pl.col("lat").is_not_null(),
|
||||
pl.col("lon").is_not_null(),
|
||||
pl.col("Total floor area (sqm)").is_not_null(),
|
||||
pl.col("Total floor area (sqm)") > 0,
|
||||
pl.col("Last known price").is_not_null(),
|
||||
pl.col("Last known price") > 0,
|
||||
pl.col("Postcode").is_not_null(),
|
||||
pl.col("Date of last transaction").is_not_null(),
|
||||
)
|
||||
query = lf.select(
|
||||
"Postcode",
|
||||
"Property type",
|
||||
"lat",
|
||||
"lon",
|
||||
"Total floor area (sqm)",
|
||||
"Last known price",
|
||||
"Date of last transaction",
|
||||
).filter(
|
||||
pl.col("lat").is_not_null(),
|
||||
pl.col("lon").is_not_null(),
|
||||
pl.col("Total floor area (sqm)").is_not_null(),
|
||||
pl.col("Total floor area (sqm)") > 0,
|
||||
pl.col("Last known price").is_not_null(),
|
||||
pl.col("Last known price") > 0,
|
||||
pl.col("Postcode").is_not_null(),
|
||||
pl.col("Date of last transaction").is_not_null(),
|
||||
)
|
||||
if max_sale_year is not None:
|
||||
query = query.filter(
|
||||
pl.col("Date of last transaction").dt.year() < max_sale_year
|
||||
)
|
||||
|
||||
pool = (
|
||||
query.with_columns(
|
||||
sector_expr(),
|
||||
type_group_expr(),
|
||||
(
|
||||
pl.col("Date of last transaction").dt.year().cast(pl.Float64)
|
||||
+ (
|
||||
pl.col("Date of last transaction").dt.month().cast(pl.Float64)
|
||||
- 1.0
|
||||
)
|
||||
/ 12.0
|
||||
).alias("_sale_fy"),
|
||||
pl.lit(ref_frac_year).alias("_ref_fy"),
|
||||
).collect()
|
||||
)
|
||||
pool = query.with_columns(
|
||||
sector_expr(),
|
||||
type_group_expr(),
|
||||
(
|
||||
pl.col("Date of last transaction").dt.year().cast(pl.Float64)
|
||||
+ (pl.col("Date of last transaction").dt.month().cast(pl.Float64) - 1.0)
|
||||
/ 12.0
|
||||
).alias("_sale_fy"),
|
||||
pl.lit(ref_frac_year).alias("_ref_fy"),
|
||||
).collect()
|
||||
pool = pool.filter(pl.col("type_group").is_not_null())
|
||||
print(f" {len(pool):,} pool properties with lat/lon, floor area, price")
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue