Rerun prepare script

This commit is contained in:
Andras Schmelczer 2026-04-06 11:13:52 +01:00
parent 349a6c1d53
commit 8614acdfae
24 changed files with 1132 additions and 226 deletions

View file

@ -65,7 +65,6 @@ _AREA_COLUMNS = [
# Politics
"Winning party",
"Voter turnout (%)",
"Majority (%)",
"% Labour",
"% Conservative",
"% Liberal Democrat",
@ -116,15 +115,19 @@ def _build(
arcgis = (
pl.scan_parquet(arcgis_path)
.filter(pl.col("ctry") == "E92000001") # England only
.filter(pl.col("ctry25cd") == "E92000001") # England only
.filter(pl.col("doterm").is_null()) # Active postcodes only
# NSPL Feb 2026 renamed geographic code columns to {field}{year}cd.
# Alias them back to the short canonical names used across the
# pipeline so downstream joins don't need to know about NSPL's
# versioning scheme.
.select(
pl.col("pcds").alias("postcode"),
"lat",
pl.col("long").alias("lon"),
"lsoa21",
"oa21",
"pcon",
pl.col("lsoa21cd").alias("lsoa21"),
pl.col("oa21cd").alias("oa21"),
pl.col("pcon24cd").alias("pcon"),
)
)
wide = wide.join(arcgis, on="postcode", how="left")
@ -354,13 +357,12 @@ def _build(
"minor_crime_avg_yr": "Minor crime (avg/yr)",
"serious_crime_per_1k": "Serious crime per 1k residents (avg/yr)",
"minor_crime_per_1k": "Minor crime per 1k residents (avg/yr)",
"median_monthly_rent": "Estimated monthly rent",
"mean_monthly_rent": "Estimated monthly rent",
"floor_height": "Interior height (m)",
"was_council_house": "Former council house",
"median_age": "Median age",
"winning_party": "Winning party",
"turnout_pct": "Voter turnout (%)",
"majority_pct": "Majority (%)",
}
)
)

View file

@ -28,10 +28,14 @@ def main():
)
args = parser.parse_args()
# Load Ofsted data: filter to good+ (1, 2) primary/secondary schools
# Load Ofsted data: filter to good+ (1, 2) primary/secondary schools.
# Post-2025 reform the single "Overall effectiveness" grade was retired;
# the legacy 14 scale is now carried forward under "Latest OEIF overall
# effectiveness" (OEIF = the previous Ofsted Education Inspection
# Framework). The new report-card columns use text judgements instead.
ofsted = pl.read_parquet(args.ofsted).filter(
pl.col("Ofsted phase").is_in(["Primary", "Secondary"])
& pl.col("Overall effectiveness").is_in(["1", "2"])
& pl.col("Latest OEIF overall effectiveness").is_in(["1", "2"])
)
print(f"Good+ schools: {len(ofsted):,}")