More
This commit is contained in:
parent
1f68ca0512
commit
3599803589
43 changed files with 3578 additions and 262 deletions
|
|
@ -36,9 +36,10 @@ def main():
|
|||
df = pl.read_parquet(args.input)
|
||||
print(f" {len(df):,} rows, {len(df.columns)} columns")
|
||||
|
||||
# Drop existing estimated price column if re-running
|
||||
if "Estimated current price" in df.columns:
|
||||
df = df.drop("Estimated current price")
|
||||
# Drop existing estimated columns if re-running
|
||||
for col in ["Estimated current price", "Est. price per sqm"]:
|
||||
if col in df.columns:
|
||||
df = df.drop(col)
|
||||
|
||||
# Derive helper columns for the join
|
||||
has_price = (
|
||||
|
|
@ -126,6 +127,14 @@ def main():
|
|||
.alias("Estimated current price"),
|
||||
)
|
||||
|
||||
# Derive estimated price per sqm where both estimated price and floor area exist
|
||||
df = df.with_columns(
|
||||
(pl.col("Estimated current price") / pl.col("Total floor area (sqm)"))
|
||||
.round(0)
|
||||
.cast(pl.Int32)
|
||||
.alias("Est. price per sqm"),
|
||||
)
|
||||
|
||||
n_adjusted = df.filter(
|
||||
has_price & pl.col("_log_index_sale").is_not_null()
|
||||
).height
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue