97 lines
3.3 KiB
Python
97 lines
3.3 KiB
Python
"""Compute Ofsted-rated school proximity counts per postcode."""
|
||
|
||
import argparse
|
||
from pathlib import Path
|
||
|
||
import polars as pl
|
||
|
||
from pipeline.utils.poi_counts import count_pois_per_postcode
|
||
|
||
SCHOOL_GROUPS = {
|
||
"good_primary": ["good_primary", "outstanding_primary"],
|
||
"good_secondary": ["good_secondary", "outstanding_secondary"],
|
||
"outstanding_primary": ["outstanding_primary"],
|
||
"outstanding_secondary": ["outstanding_secondary"],
|
||
}
|
||
|
||
|
||
def main():
|
||
parser = argparse.ArgumentParser(
|
||
description="Count good+ and outstanding primary/secondary schools near each postcode"
|
||
)
|
||
parser.add_argument(
|
||
"--ofsted", type=Path, required=True, help="Ofsted inspection parquet"
|
||
)
|
||
parser.add_argument(
|
||
"--arcgis", type=Path, required=True, help="ArcGIS postcode parquet"
|
||
)
|
||
parser.add_argument(
|
||
"--output", type=Path, required=True, help="Output parquet path"
|
||
)
|
||
args = parser.parse_args()
|
||
|
||
# Load Ofsted data: filter to good+ (1, 2) primary/secondary schools.
|
||
# Post-2025 reform the single "Overall effectiveness" grade was retired;
|
||
# the legacy 1–4 scale is now carried forward under "Latest OEIF overall
|
||
# effectiveness" (OEIF = the previous Ofsted Education Inspection
|
||
# Framework). The new report-card columns use text judgements instead.
|
||
ofsted = pl.read_parquet(args.ofsted).filter(
|
||
pl.col("Ofsted phase").is_in(["Primary", "Secondary"])
|
||
& pl.col("Latest OEIF overall effectiveness").is_in(["1", "2"])
|
||
)
|
||
|
||
print(f"Good+ schools: {len(ofsted):,}")
|
||
print(
|
||
"Outstanding schools: "
|
||
f"{ofsted.filter(pl.col('Latest OEIF overall effectiveness') == '1').height:,}"
|
||
)
|
||
|
||
# Assign category based on phase and rating. Good+ groups include both
|
||
# category variants; outstanding groups count grade 1 only.
|
||
ofsted = ofsted.with_columns(
|
||
pl.when(pl.col("Ofsted phase") == "Primary")
|
||
.then(
|
||
pl.when(pl.col("Latest OEIF overall effectiveness") == "1")
|
||
.then(pl.lit("outstanding_primary"))
|
||
.otherwise(pl.lit("good_primary"))
|
||
)
|
||
.otherwise(
|
||
pl.when(pl.col("Latest OEIF overall effectiveness") == "1")
|
||
.then(pl.lit("outstanding_secondary"))
|
||
.otherwise(pl.lit("good_secondary"))
|
||
)
|
||
.alias("category")
|
||
).select(
|
||
pl.col("Postcode").alias("postcode"),
|
||
"category",
|
||
)
|
||
|
||
# Join with arcgis to get lat/lng for each school's postcode
|
||
arcgis = pl.read_parquet(args.arcgis).select(
|
||
pl.col("pcds").alias("postcode"),
|
||
"lat",
|
||
pl.col("long").alias("lng"),
|
||
)
|
||
|
||
schools = ofsted.join(arcgis, on="postcode", how="inner")
|
||
print(f"Schools with coordinates: {len(schools):,}")
|
||
|
||
# Load all postcodes for proximity counting
|
||
postcodes = arcgis.rename({"lng": "lon"})
|
||
|
||
counts_5km = count_pois_per_postcode(
|
||
postcodes, schools, radius_km=5, groups=SCHOOL_GROUPS
|
||
)
|
||
counts_2km = count_pois_per_postcode(
|
||
postcodes, schools, radius_km=2, groups=SCHOOL_GROUPS
|
||
)
|
||
|
||
result = counts_5km.join(counts_2km, on="postcode")
|
||
|
||
result.write_parquet(args.output)
|
||
size_mb = args.output.stat().st_size / (1024 * 1024)
|
||
print(f"Wrote {args.output} ({size_mb:.1f} MB)")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|