Add actual construction age
This commit is contained in:
parent
646d8bdc46
commit
806213ce3c
1 changed files with 32 additions and 0 deletions
|
|
@ -4,6 +4,8 @@ from pathlib import Path
|
||||||
from ..utils import fuzzy_join_on_postcode
|
from ..utils import fuzzy_join_on_postcode
|
||||||
|
|
||||||
|
|
||||||
|
MIN_FLOOR_AREA_M2 = 10
|
||||||
|
|
||||||
pl.Config.set_tbl_cols(-1)
|
pl.Config.set_tbl_cols(-1)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -69,6 +71,7 @@ def main():
|
||||||
"locality",
|
"locality",
|
||||||
"town_city",
|
"town_city",
|
||||||
pl.col("duration").replace(duration_map),
|
pl.col("duration").replace(duration_map),
|
||||||
|
"old_new",
|
||||||
)
|
)
|
||||||
.filter(pl.col("pp_property_type") != "Other")
|
.filter(pl.col("pp_property_type") != "Other")
|
||||||
.with_columns(
|
.with_columns(
|
||||||
|
|
@ -89,6 +92,8 @@ def main():
|
||||||
pl.col("duration").last(),
|
pl.col("duration").last(),
|
||||||
pl.col("price").last().alias("latest_price"),
|
pl.col("price").last().alias("latest_price"),
|
||||||
pl.col("date_of_transfer").last(),
|
pl.col("date_of_transfer").last(),
|
||||||
|
pl.col("date_of_transfer").first().alias("first_transfer_date"),
|
||||||
|
pl.col("old_new").first(),
|
||||||
)
|
)
|
||||||
).filter(pl.col("pp_address").is_not_null())
|
).filter(pl.col("pp_address").is_not_null())
|
||||||
|
|
||||||
|
|
@ -116,6 +121,33 @@ def main():
|
||||||
print(f"Matched: {matched.height} ({100 * matched.height / total:.1f}%)")
|
print(f"Matched: {matched.height} ({100 * matched.height / total:.1f}%)")
|
||||||
print(f"Unmatched: {total - matched.height}")
|
print(f"Unmatched: {total - matched.height}")
|
||||||
|
|
||||||
|
matched = matched.filter(pl.col("TOTAL_FLOOR_AREA") >= MIN_FLOOR_AREA_M2)
|
||||||
|
|
||||||
|
# For new-builds (old_new == "Y"), use the first transaction date year as
|
||||||
|
# the exact construction date; otherwise fall back to the EPC age band.
|
||||||
|
epc_band_year = (
|
||||||
|
pl.col("CONSTRUCTION_AGE_BAND")
|
||||||
|
.str.replace("England and Wales: ", "")
|
||||||
|
.str.replace(" onwards", "")
|
||||||
|
.str.extract(r"(\d{4})", 1)
|
||||||
|
.cast(pl.UInt16, strict=False)
|
||||||
|
)
|
||||||
|
transfer_year = pl.col("first_transfer_date").dt.year().cast(pl.UInt16, strict=False)
|
||||||
|
is_new_build = pl.col("old_new") == "Y"
|
||||||
|
|
||||||
|
matched = matched.with_columns(
|
||||||
|
pl.when(is_new_build & transfer_year.is_not_null())
|
||||||
|
.then(transfer_year)
|
||||||
|
.otherwise(epc_band_year)
|
||||||
|
.alias("CONSTRUCTION_AGE_BAND"),
|
||||||
|
pl.when(is_new_build & transfer_year.is_not_null())
|
||||||
|
.then(pl.lit(0, dtype=pl.UInt8))
|
||||||
|
.when(epc_band_year.is_not_null())
|
||||||
|
.then(pl.lit(1, dtype=pl.UInt8))
|
||||||
|
.otherwise(pl.lit(None, dtype=pl.UInt8))
|
||||||
|
.alias("is_construction_date_approximate"),
|
||||||
|
).drop("old_new", "first_transfer_date")
|
||||||
|
|
||||||
matched = matched.rename({col: col.lower() for col in joined.columns})
|
matched = matched.rename({col: col.lower() for col in joined.columns})
|
||||||
|
|
||||||
print(matched.head())
|
print(matched.head())
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue