Add council house

This commit is contained in:
Andras Schmelczer 2026-03-24 22:52:56 +00:00
parent 300209b192
commit 582bc856d8
2 changed files with 36 additions and 19 deletions

View file

@ -37,6 +37,7 @@ def main():
"NUMBER_HABITABLE_ROOMS",
"FLOOR_HEIGHT",
"CONSTRUCTION_AGE_BAND",
"TENURE",
)
.filter(pl.col("epc_address").is_not_null())
.with_columns(
@ -52,6 +53,7 @@ def main():
epc_base.sort("INSPECTION_DATE", descending=True)
.group_by("epc_address", "POSTCODE")
.first()
.drop("TENURE")
)
# Events fork: detect renovation events between consecutive certificates
@ -124,11 +126,29 @@ def main():
print(f"Renovation events: {events.height} properties with events")
print(event_counts)
# Left-join events back onto dedup EPC
# Social tenure fork: flag properties that were ever social housing
social_tenure = (
epc_base.filter(
pl.col("TENURE").str.to_lowercase().str.contains("social")
)
.select("epc_address", "POSTCODE")
.unique()
.with_columns(pl.lit("Yes").alias("was_council_house"))
.collect()
)
print(f"Former council houses (EPC social tenure): {social_tenure.height}")
# Left-join events and social tenure back onto dedup EPC
epc = epc.join(
events.lazy(),
on=["epc_address", "POSTCODE"],
how="left",
).join(
social_tenure.lazy(),
on=["epc_address", "POSTCODE"],
how="left",
).with_columns(
pl.col("was_council_house").fill_null("No"),
)
print("EPC dataset")