"""Compute good-rated school proximity counts per postcode.""" import argparse from pathlib import Path import polars as pl from pipeline.utils.poi_counts import count_pois_per_postcode SCHOOL_GROUPS = { "good_primary": ["good_primary"], "good_secondary": ["good_secondary"], } def main(): parser = argparse.ArgumentParser( description="Count good+ primary/secondary schools within 2km per postcode" ) parser.add_argument( "--ofsted", type=Path, required=True, help="Ofsted inspection parquet" ) parser.add_argument( "--arcgis", type=Path, required=True, help="ArcGIS postcode parquet" ) parser.add_argument( "--output", type=Path, required=True, help="Output parquet path" ) args = parser.parse_args() # Load Ofsted data: filter to good+ (1, 2) primary/secondary schools ofsted = pl.read_parquet(args.ofsted).filter( pl.col("Ofsted phase").is_in(["Primary", "Secondary"]) & pl.col("Overall effectiveness").is_in(["1", "2"]) ) print(f"Good+ schools: {len(ofsted):,}") # Assign category based on phase ofsted = ofsted.with_columns( pl.when(pl.col("Ofsted phase") == "Primary") .then(pl.lit("good_primary")) .otherwise(pl.lit("good_secondary")) .alias("category") ).select( pl.col("Postcode").alias("postcode"), "category", ) # Join with arcgis to get lat/lng for each school's postcode arcgis = pl.read_parquet(args.arcgis).select( pl.col("pcds").alias("postcode"), "lat", pl.col("long").alias("lng"), ) schools = ofsted.join(arcgis, on="postcode", how="inner") print(f"Schools with coordinates: {len(schools):,}") # Load all postcodes for proximity counting postcodes = arcgis.rename({"lng": "lon"}) counts_5km = count_pois_per_postcode( postcodes, schools, radius_km=5, groups=SCHOOL_GROUPS ) counts_2km = count_pois_per_postcode( postcodes, schools, radius_km=2, groups=SCHOOL_GROUPS ) result = counts_5km.join(counts_2km, on="postcode") result.write_parquet(args.output) size_mb = args.output.stat().st_size / (1024 * 1024) print(f"Wrote {args.output} ({size_mb:.1f} MB)") if __name__ == "__main__": main()