This commit is contained in:
Andras Schmelczer 2026-02-18 21:22:15 +00:00
parent 524580eb25
commit ffe080adef
82 changed files with 2652 additions and 2956 deletions

View file

@ -1,7 +1,6 @@
"""Shared utilities for price estimation modules."""
from datetime import date
from pathlib import Path
import numpy as np
import polars as pl
@ -9,7 +8,6 @@ import polars as pl
CURRENT_YEAR = 2026
_today = date.today()
CURRENT_FRAC_YEAR = _today.year + (_today.month - 1) / 12
CURRENT_MONTH = _today.month
# Cap on log(index_ratio) to prevent wild estimates from thin sectors
MAX_LOG_ADJUSTMENT = 3.0 # ~20x max price change
@ -181,53 +179,3 @@ def join_type_stratified_index(
).drop(_typed, _all)
return df
def compute_seasonal_factors(
input_path: Path, max_sale_year: int | None = None
) -> np.ndarray:
"""Compute 12 multiplicative monthly price factors from price-per-sqm.
Detrends by normalizing median £/sqm within each year, then averages
across years. Returns array of 12 factors (index 0 = January).
Normalized so mean = 1.0.
"""
query = (
pl.scan_parquet(input_path)
.select("Last known price", "Total floor area (sqm)", "Date of last transaction")
.filter(
pl.col("Last known price").is_not_null(),
pl.col("Last known price") > 0,
pl.col("Total floor area (sqm)").is_not_null(),
pl.col("Total floor area (sqm)") > 0,
pl.col("Date of last transaction").is_not_null(),
)
.with_columns(
(
pl.col("Last known price").cast(pl.Float64)
/ pl.col("Total floor area (sqm)").cast(pl.Float64)
).alias("psm"),
pl.col("Date of last transaction").dt.month().alias("month"),
pl.col("Date of last transaction").dt.year().alias("year"),
)
)
if max_sale_year is not None:
query = query.filter(pl.col("year") < max_sale_year)
monthly = (
query.group_by("year", "month")
.agg(pl.col("psm").median().alias("median_psm"))
.with_columns(
pl.col("median_psm").mean().over("year").alias("year_mean"),
)
.with_columns(
(pl.col("median_psm") / pl.col("year_mean")).alias("ratio"),
)
.group_by("month")
.agg(pl.col("ratio").mean().alias("factor"))
.sort("month")
.collect()
)
factors = monthly["factor"].to_numpy().astype(np.float64)
return factors / factors.mean()