Rerun prepare script

2026-04-06 11:13:52 +01:00 · 2026-04-06 11:13:52 +01:00 · 8614acdfae
commit 8614acdfae
parent 349a6c1d53
24 changed files with 1132 additions and 226 deletions
--- a/pipeline/transform/merge.py
+++ b/pipeline/transform/merge.py
@ -65,7 +65,6 @@ _AREA_COLUMNS = [
    # Politics
    "Winning party",
    "Voter turnout (%)",
-    "Majority (%)",
    "% Labour",
    "% Conservative",
    "% Liberal Democrat",
@ -116,15 +115,19 @@ def _build(

    arcgis = (
        pl.scan_parquet(arcgis_path)
-        .filter(pl.col("ctry") == "E92000001")  # England only
+        .filter(pl.col("ctry25cd") == "E92000001")  # England only
        .filter(pl.col("doterm").is_null())  # Active postcodes only
+        # NSPL Feb 2026 renamed geographic code columns to {field}{year}cd.
+        # Alias them back to the short canonical names used across the
+        # pipeline so downstream joins don't need to know about NSPL's
+        # versioning scheme.
        .select(
            pl.col("pcds").alias("postcode"),
            "lat",
            pl.col("long").alias("lon"),
-            "lsoa21",
-            "oa21",
-            "pcon",
+            pl.col("lsoa21cd").alias("lsoa21"),
+            pl.col("oa21cd").alias("oa21"),
+            pl.col("pcon24cd").alias("pcon"),
        )
    )
    wide = wide.join(arcgis, on="postcode", how="left")
@ -354,13 +357,12 @@ def _build(
                "minor_crime_avg_yr": "Minor crime (avg/yr)",
                "serious_crime_per_1k": "Serious crime per 1k residents (avg/yr)",
                "minor_crime_per_1k": "Minor crime per 1k residents (avg/yr)",
-                "median_monthly_rent": "Estimated monthly rent",
+                "mean_monthly_rent": "Estimated monthly rent",
                "floor_height": "Interior height (m)",
                "was_council_house": "Former council house",
                "median_age": "Median age",
                "winning_party": "Winning party",
                "turnout_pct": "Voter turnout (%)",
-                "majority_pct": "Majority (%)",
            }
        )
    )
--- a/pipeline/transform/school_proximity.py
+++ b/pipeline/transform/school_proximity.py
@ -28,10 +28,14 @@ def main():
    )
    args = parser.parse_args()

-    # Load Ofsted data: filter to good+ (1, 2) primary/secondary schools
+    # Load Ofsted data: filter to good+ (1, 2) primary/secondary schools.
+    # Post-2025 reform the single "Overall effectiveness" grade was retired;
+    # the legacy 1–4 scale is now carried forward under "Latest OEIF overall
+    # effectiveness" (OEIF = the previous Ofsted Education Inspection
+    # Framework). The new report-card columns use text judgements instead.
    ofsted = pl.read_parquet(args.ofsted).filter(
        pl.col("Ofsted phase").is_in(["Primary", "Secondary"])
-        & pl.col("Overall effectiveness").is_in(["1", "2"])
+        & pl.col("Latest OEIF overall effectiveness").is_in(["1", "2"])
    )

    print(f"Good+ schools: {len(ofsted):,}")