Improve data pipeline
This commit is contained in:
parent
e8345cbdc1
commit
f99bd4e5c9
36 changed files with 966 additions and 129 deletions
|
|
@ -67,6 +67,16 @@ def extract_test_set(input_path: Path) -> pl.DataFrame:
|
|||
.struct.field("price")
|
||||
.alias("input_price"),
|
||||
)
|
||||
.with_columns(
|
||||
# Date of the input (second-to-last) sale, used by the kNN leakage
|
||||
# filter to exclude the target property's own prior sale from its
|
||||
# comparables. Built from year+month (day defaults to the 1st).
|
||||
pl.date(
|
||||
pl.col("input_year").cast(pl.Int32),
|
||||
pl.col("input_month").cast(pl.Int32),
|
||||
1,
|
||||
).alias("input_date"),
|
||||
)
|
||||
.with_columns(
|
||||
(
|
||||
pl.col("actual_year").cast(pl.Float64)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue