Outer join epc
This commit is contained in:
parent
6268dbda4d
commit
609dd5278c
1 changed files with 4 additions and 8 deletions
|
|
@ -4,8 +4,6 @@ from pathlib import Path
|
||||||
from ..utils import fuzzy_join_on_postcode
|
from ..utils import fuzzy_join_on_postcode
|
||||||
|
|
||||||
|
|
||||||
MIN_FLOOR_AREA_M2 = 10
|
|
||||||
|
|
||||||
pl.Config.set_tbl_cols(-1)
|
pl.Config.set_tbl_cols(-1)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -121,8 +119,6 @@ def main():
|
||||||
print(f"Matched: {matched.height} ({100 * matched.height / total:.1f}%)")
|
print(f"Matched: {matched.height} ({100 * matched.height / total:.1f}%)")
|
||||||
print(f"Unmatched: {total - matched.height}")
|
print(f"Unmatched: {total - matched.height}")
|
||||||
|
|
||||||
matched = matched.filter(pl.col("TOTAL_FLOOR_AREA") >= MIN_FLOOR_AREA_M2)
|
|
||||||
|
|
||||||
# For new-builds (old_new == "Y"), use the first transaction date year as
|
# For new-builds (old_new == "Y"), use the first transaction date year as
|
||||||
# the exact construction date; otherwise fall back to the EPC age band.
|
# the exact construction date; otherwise fall back to the EPC age band.
|
||||||
epc_band_year = (
|
epc_band_year = (
|
||||||
|
|
@ -137,7 +133,7 @@ def main():
|
||||||
)
|
)
|
||||||
is_new_build = pl.col("old_new") == "Y"
|
is_new_build = pl.col("old_new") == "Y"
|
||||||
|
|
||||||
matched = matched.with_columns(
|
joined = joined.with_columns(
|
||||||
pl.when(is_new_build & transfer_year.is_not_null())
|
pl.when(is_new_build & transfer_year.is_not_null())
|
||||||
.then(transfer_year)
|
.then(transfer_year)
|
||||||
.otherwise(epc_band_year)
|
.otherwise(epc_band_year)
|
||||||
|
|
@ -150,10 +146,10 @@ def main():
|
||||||
.alias("is_construction_date_approximate"),
|
.alias("is_construction_date_approximate"),
|
||||||
).drop("old_new", "first_transfer_date")
|
).drop("old_new", "first_transfer_date")
|
||||||
|
|
||||||
matched = matched.rename({col: col.lower() for col in joined.columns})
|
joined = joined.rename({col: col.lower() for col in joined.columns})
|
||||||
|
|
||||||
print(matched.head())
|
print(joined.head())
|
||||||
matched.write_parquet(args.output)
|
joined.write_parquet(args.output)
|
||||||
print(f"Wrote {args.output}")
|
print(f"Wrote {args.output}")
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue