This commit is contained in:
Andras Schmelczer 2026-02-10 22:21:15 +00:00
parent 1f68ca0512
commit 3599803589
43 changed files with 3578 additions and 262 deletions

View file

@ -36,9 +36,10 @@ def main():
df = pl.read_parquet(args.input)
print(f" {len(df):,} rows, {len(df.columns)} columns")
# Drop existing estimated price column if re-running
if "Estimated current price" in df.columns:
df = df.drop("Estimated current price")
# Drop existing estimated columns if re-running
for col in ["Estimated current price", "Est. price per sqm"]:
if col in df.columns:
df = df.drop(col)
# Derive helper columns for the join
has_price = (
@ -126,6 +127,14 @@ def main():
.alias("Estimated current price"),
)
# Derive estimated price per sqm where both estimated price and floor area exist
df = df.with_columns(
(pl.col("Estimated current price") / pl.col("Total floor area (sqm)"))
.round(0)
.cast(pl.Int32)
.alias("Est. price per sqm"),
)
n_adjusted = df.filter(
has_price & pl.col("_log_index_sale").is_not_null()
).height