Fmt
This commit is contained in:
parent
479ef92236
commit
c38d654ac7
44 changed files with 2526 additions and 701 deletions
|
|
@ -43,48 +43,39 @@ def build_knn_pool(
|
|||
"""
|
||||
print("Building kNN pool...")
|
||||
lf = pl.scan_parquet(source) if isinstance(source, Path) else source
|
||||
query = (
|
||||
lf
|
||||
.select(
|
||||
"Postcode",
|
||||
"Property type",
|
||||
"lat",
|
||||
"lon",
|
||||
"Total floor area (sqm)",
|
||||
"Last known price",
|
||||
"Date of last transaction",
|
||||
)
|
||||
.filter(
|
||||
pl.col("lat").is_not_null(),
|
||||
pl.col("lon").is_not_null(),
|
||||
pl.col("Total floor area (sqm)").is_not_null(),
|
||||
pl.col("Total floor area (sqm)") > 0,
|
||||
pl.col("Last known price").is_not_null(),
|
||||
pl.col("Last known price") > 0,
|
||||
pl.col("Postcode").is_not_null(),
|
||||
pl.col("Date of last transaction").is_not_null(),
|
||||
)
|
||||
query = lf.select(
|
||||
"Postcode",
|
||||
"Property type",
|
||||
"lat",
|
||||
"lon",
|
||||
"Total floor area (sqm)",
|
||||
"Last known price",
|
||||
"Date of last transaction",
|
||||
).filter(
|
||||
pl.col("lat").is_not_null(),
|
||||
pl.col("lon").is_not_null(),
|
||||
pl.col("Total floor area (sqm)").is_not_null(),
|
||||
pl.col("Total floor area (sqm)") > 0,
|
||||
pl.col("Last known price").is_not_null(),
|
||||
pl.col("Last known price") > 0,
|
||||
pl.col("Postcode").is_not_null(),
|
||||
pl.col("Date of last transaction").is_not_null(),
|
||||
)
|
||||
if max_sale_year is not None:
|
||||
query = query.filter(
|
||||
pl.col("Date of last transaction").dt.year() < max_sale_year
|
||||
)
|
||||
|
||||
pool = (
|
||||
query.with_columns(
|
||||
sector_expr(),
|
||||
type_group_expr(),
|
||||
(
|
||||
pl.col("Date of last transaction").dt.year().cast(pl.Float64)
|
||||
+ (
|
||||
pl.col("Date of last transaction").dt.month().cast(pl.Float64)
|
||||
- 1.0
|
||||
)
|
||||
/ 12.0
|
||||
).alias("_sale_fy"),
|
||||
pl.lit(ref_frac_year).alias("_ref_fy"),
|
||||
).collect()
|
||||
)
|
||||
pool = query.with_columns(
|
||||
sector_expr(),
|
||||
type_group_expr(),
|
||||
(
|
||||
pl.col("Date of last transaction").dt.year().cast(pl.Float64)
|
||||
+ (pl.col("Date of last transaction").dt.month().cast(pl.Float64) - 1.0)
|
||||
/ 12.0
|
||||
).alias("_sale_fy"),
|
||||
pl.lit(ref_frac_year).alias("_ref_fy"),
|
||||
).collect()
|
||||
pool = pool.filter(pl.col("type_group").is_not_null())
|
||||
print(f" {len(pool):,} pool properties with lat/lon, floor area, price")
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue