Outer join epc
This commit is contained in:
parent
6268dbda4d
commit
609dd5278c
1 changed files with 4 additions and 8 deletions
|
|
@ -4,8 +4,6 @@ from pathlib import Path
|
|||
from ..utils import fuzzy_join_on_postcode
|
||||
|
||||
|
||||
MIN_FLOOR_AREA_M2 = 10
|
||||
|
||||
pl.Config.set_tbl_cols(-1)
|
||||
|
||||
|
||||
|
|
@ -121,8 +119,6 @@ def main():
|
|||
print(f"Matched: {matched.height} ({100 * matched.height / total:.1f}%)")
|
||||
print(f"Unmatched: {total - matched.height}")
|
||||
|
||||
matched = matched.filter(pl.col("TOTAL_FLOOR_AREA") >= MIN_FLOOR_AREA_M2)
|
||||
|
||||
# For new-builds (old_new == "Y"), use the first transaction date year as
|
||||
# the exact construction date; otherwise fall back to the EPC age band.
|
||||
epc_band_year = (
|
||||
|
|
@ -137,7 +133,7 @@ def main():
|
|||
)
|
||||
is_new_build = pl.col("old_new") == "Y"
|
||||
|
||||
matched = matched.with_columns(
|
||||
joined = joined.with_columns(
|
||||
pl.when(is_new_build & transfer_year.is_not_null())
|
||||
.then(transfer_year)
|
||||
.otherwise(epc_band_year)
|
||||
|
|
@ -150,10 +146,10 @@ def main():
|
|||
.alias("is_construction_date_approximate"),
|
||||
).drop("old_new", "first_transfer_date")
|
||||
|
||||
matched = matched.rename({col: col.lower() for col in joined.columns})
|
||||
joined = joined.rename({col: col.lower() for col in joined.columns})
|
||||
|
||||
print(matched.head())
|
||||
matched.write_parquet(args.output)
|
||||
print(joined.head())
|
||||
joined.write_parquet(args.output)
|
||||
print(f"Wrote {args.output}")
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue