This commit is contained in:
Andras Schmelczer 2026-05-06 23:13:58 +01:00
parent 94f9c0d594
commit 5c3b87f2d5
69 changed files with 1334 additions and 213 deletions

View file

@ -128,9 +128,7 @@ def main():
# Social tenure fork: flag properties that were ever social housing
social_tenure = (
epc_base.filter(
pl.col("TENURE").str.to_lowercase().str.contains("social")
)
epc_base.filter(pl.col("TENURE").str.to_lowercase().str.contains("social"))
.select("epc_address", "POSTCODE")
.unique()
.with_columns(pl.lit("Yes").alias("was_council_house"))
@ -139,16 +137,20 @@ def main():
print(f"Former council houses (EPC social tenure): {social_tenure.height}")
# Left-join events and social tenure back onto dedup EPC
epc = epc.join(
events.lazy(),
on=["epc_address", "POSTCODE"],
how="left",
).join(
social_tenure.lazy(),
on=["epc_address", "POSTCODE"],
how="left",
).with_columns(
pl.col("was_council_house").fill_null("No"),
epc = (
epc.join(
events.lazy(),
on=["epc_address", "POSTCODE"],
how="left",
)
.join(
social_tenure.lazy(),
on=["epc_address", "POSTCODE"],
how="left",
)
.with_columns(
pl.col("was_council_house").fill_null("No"),
)
)
print("EPC dataset")

View file

@ -1092,6 +1092,7 @@ GROCERY_FASCIA_ICON_NAMES: dict[str, str] = {
"Asda Living": "Asda Living",
"Asda PFS": "Asda PFS",
"Cooltrader": "Heron Foods",
"Co-op Food": "Co-op",
"Cook": "COOK",
"Eurospar": "Spar",
"Eurospar PFS": "Spar",
@ -1144,9 +1145,7 @@ def transform_grocery_retail_points(
required = {"id", "retailer", "fascia", "store_name", "long_wgs", "lat_wgs"}
missing = required - set(grocery_df.columns)
if missing:
raise ValueError(
f"GEOLYTIX retail points missing columns: {sorted(missing)}"
)
raise ValueError(f"GEOLYTIX retail points missing columns: {sorted(missing)}")
df = (
grocery_df.select(