changes
This commit is contained in:
parent
524580eb25
commit
ffe080adef
82 changed files with 2652 additions and 2956 deletions
|
|
@ -1,7 +1,6 @@
|
|||
"""Shared utilities for price estimation modules."""
|
||||
|
||||
from datetime import date
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import polars as pl
|
||||
|
|
@ -9,7 +8,6 @@ import polars as pl
|
|||
CURRENT_YEAR = 2026
|
||||
_today = date.today()
|
||||
CURRENT_FRAC_YEAR = _today.year + (_today.month - 1) / 12
|
||||
CURRENT_MONTH = _today.month
|
||||
|
||||
# Cap on log(index_ratio) to prevent wild estimates from thin sectors
|
||||
MAX_LOG_ADJUSTMENT = 3.0 # ~20x max price change
|
||||
|
|
@ -181,53 +179,3 @@ def join_type_stratified_index(
|
|||
).drop(_typed, _all)
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def compute_seasonal_factors(
|
||||
input_path: Path, max_sale_year: int | None = None
|
||||
) -> np.ndarray:
|
||||
"""Compute 12 multiplicative monthly price factors from price-per-sqm.
|
||||
|
||||
Detrends by normalizing median £/sqm within each year, then averages
|
||||
across years. Returns array of 12 factors (index 0 = January).
|
||||
Normalized so mean = 1.0.
|
||||
"""
|
||||
query = (
|
||||
pl.scan_parquet(input_path)
|
||||
.select("Last known price", "Total floor area (sqm)", "Date of last transaction")
|
||||
.filter(
|
||||
pl.col("Last known price").is_not_null(),
|
||||
pl.col("Last known price") > 0,
|
||||
pl.col("Total floor area (sqm)").is_not_null(),
|
||||
pl.col("Total floor area (sqm)") > 0,
|
||||
pl.col("Date of last transaction").is_not_null(),
|
||||
)
|
||||
.with_columns(
|
||||
(
|
||||
pl.col("Last known price").cast(pl.Float64)
|
||||
/ pl.col("Total floor area (sqm)").cast(pl.Float64)
|
||||
).alias("psm"),
|
||||
pl.col("Date of last transaction").dt.month().alias("month"),
|
||||
pl.col("Date of last transaction").dt.year().alias("year"),
|
||||
)
|
||||
)
|
||||
if max_sale_year is not None:
|
||||
query = query.filter(pl.col("year") < max_sale_year)
|
||||
|
||||
monthly = (
|
||||
query.group_by("year", "month")
|
||||
.agg(pl.col("psm").median().alias("median_psm"))
|
||||
.with_columns(
|
||||
pl.col("median_psm").mean().over("year").alias("year_mean"),
|
||||
)
|
||||
.with_columns(
|
||||
(pl.col("median_psm") / pl.col("year_mean")).alias("ratio"),
|
||||
)
|
||||
.group_by("month")
|
||||
.agg(pl.col("ratio").mean().alias("factor"))
|
||||
.sort("month")
|
||||
.collect()
|
||||
)
|
||||
|
||||
factors = monthly["factor"].to_numpy().astype(np.float64)
|
||||
return factors / factors.mean()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue