LGTM
This commit is contained in:
parent
a8165249a4
commit
a4103b0896
64 changed files with 5376 additions and 3832 deletions
94
pipeline/transform/price_estimation/test_knn.py
Normal file
94
pipeline/transform/price_estimation/test_knn.py
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
from datetime import date
|
||||
|
||||
import numpy as np
|
||||
import polars as pl
|
||||
|
||||
from pipeline.transform.price_estimation.estimate import guarded_blend_estimates
|
||||
from pipeline.transform.price_estimation.knn import build_knn_pool, knn_median_psm
|
||||
from pipeline.transform.price_estimation.utils import TYPE_GROUPS, type_group_expr
|
||||
|
||||
|
||||
def _flat_index() -> pl.DataFrame:
|
||||
return pl.DataFrame(
|
||||
{
|
||||
"sector": ["AA1 1", "AA1 1"],
|
||||
"type_group": ["Detached", "All"],
|
||||
"year": [2026, 2026],
|
||||
"log_index": [0.0, 0.0],
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def test_knn_excludes_same_sale_and_uses_stable_comparables():
|
||||
sale_date = date(2026, 1, 1)
|
||||
rows = [
|
||||
{
|
||||
"Postcode": "AA1 1AA",
|
||||
"Property type": "Detached",
|
||||
"lat": 51.5000,
|
||||
"lon": -0.1000,
|
||||
"Total floor area (sqm)": 80.0,
|
||||
"Last known price": 900_000.0,
|
||||
"Date of last transaction": sale_date,
|
||||
}
|
||||
]
|
||||
rows.extend(
|
||||
{
|
||||
"Postcode": "AA1 1AA",
|
||||
"Property type": "Detached",
|
||||
"lat": 51.5001 + i * 0.00001,
|
||||
"lon": -0.1001,
|
||||
"Total floor area (sqm)": 20.0,
|
||||
"Last known price": 900_000.0,
|
||||
"Date of last transaction": sale_date,
|
||||
}
|
||||
for i in range(5)
|
||||
)
|
||||
rows.extend(
|
||||
{
|
||||
"Postcode": f"AA1 1B{i}",
|
||||
"Property type": "Detached",
|
||||
"lat": 51.5010 + i * 0.00001,
|
||||
"lon": -0.1010,
|
||||
"Total floor area (sqm)": 80.0,
|
||||
"Last known price": 200_000.0,
|
||||
"Date of last transaction": sale_date,
|
||||
}
|
||||
for i in range(5)
|
||||
)
|
||||
df = pl.DataFrame(rows)
|
||||
|
||||
trees = build_knn_pool(df.lazy(), _flat_index(), 2026.0)
|
||||
psm = knn_median_psm(
|
||||
trees,
|
||||
lat=np.array([51.5000]),
|
||||
lon=np.array([-0.1000]),
|
||||
type_groups=np.array(["Detached"]),
|
||||
postcodes=np.array(["AA1 1AA"]),
|
||||
last_prices=np.array([900_000.0]),
|
||||
last_sale_dates=np.array(
|
||||
[sale_date.toordinal() - date(1970, 1, 1).toordinal()]
|
||||
),
|
||||
)
|
||||
|
||||
assert psm[0] == 2_500.0
|
||||
|
||||
|
||||
def test_guarded_blend_routes_unstable_knn_to_index_and_caps_uplift():
|
||||
blended = guarded_blend_estimates(
|
||||
index_est=np.array([120_000.0, 1_000_000.0]),
|
||||
knn_est=np.array([5_000_000.0, 1_000_000.0]),
|
||||
last_prices=np.array([100_000.0, 100_000.0]),
|
||||
)
|
||||
|
||||
assert blended[0] == 120_000.0
|
||||
assert blended[1] == 600_000.0
|
||||
|
||||
|
||||
def test_bungalow_is_not_a_dead_price_index_type_group():
|
||||
df = pl.DataFrame({"Property type": ["Bungalow", "Other"]}).with_columns(
|
||||
type_group_expr()
|
||||
)
|
||||
|
||||
assert "Bungalow" not in TYPE_GROUPS
|
||||
assert df["type_group"].to_list() == [None, None]
|
||||
Loading…
Add table
Add a link
Reference in a new issue