Rerun prepare script

This commit is contained in:
Andras Schmelczer 2026-04-06 11:13:52 +01:00
parent 349a6c1d53
commit 8614acdfae
24 changed files with 1132 additions and 226 deletions

View file

@ -65,7 +65,6 @@ _AREA_COLUMNS = [
# Politics
"Winning party",
"Voter turnout (%)",
"Majority (%)",
"% Labour",
"% Conservative",
"% Liberal Democrat",
@ -116,15 +115,19 @@ def _build(
arcgis = (
pl.scan_parquet(arcgis_path)
.filter(pl.col("ctry") == "E92000001") # England only
.filter(pl.col("ctry25cd") == "E92000001") # England only
.filter(pl.col("doterm").is_null()) # Active postcodes only
# NSPL Feb 2026 renamed geographic code columns to {field}{year}cd.
# Alias them back to the short canonical names used across the
# pipeline so downstream joins don't need to know about NSPL's
# versioning scheme.
.select(
pl.col("pcds").alias("postcode"),
"lat",
pl.col("long").alias("lon"),
"lsoa21",
"oa21",
"pcon",
pl.col("lsoa21cd").alias("lsoa21"),
pl.col("oa21cd").alias("oa21"),
pl.col("pcon24cd").alias("pcon"),
)
)
wide = wide.join(arcgis, on="postcode", how="left")
@ -354,13 +357,12 @@ def _build(
"minor_crime_avg_yr": "Minor crime (avg/yr)",
"serious_crime_per_1k": "Serious crime per 1k residents (avg/yr)",
"minor_crime_per_1k": "Minor crime per 1k residents (avg/yr)",
"median_monthly_rent": "Estimated monthly rent",
"mean_monthly_rent": "Estimated monthly rent",
"floor_height": "Interior height (m)",
"was_council_house": "Former council house",
"median_age": "Median age",
"winning_party": "Winning party",
"turnout_pct": "Voter turnout (%)",
"majority_pct": "Majority (%)",
}
)
)