73 lines
2.1 KiB
Python
73 lines
2.1 KiB
Python
"""Compute good-rated school proximity counts per postcode."""
|
|
|
|
import argparse
|
|
from pathlib import Path
|
|
|
|
import polars as pl
|
|
|
|
from pipeline.utils.poi_counts import count_pois_per_postcode
|
|
|
|
SCHOOL_GROUPS = {
|
|
"good_primary": ["good_primary"],
|
|
"good_secondary": ["good_secondary"],
|
|
}
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Count good+ primary/secondary schools within 2km per postcode"
|
|
)
|
|
parser.add_argument(
|
|
"--ofsted", type=Path, required=True, help="Ofsted inspection parquet"
|
|
)
|
|
parser.add_argument(
|
|
"--arcgis", type=Path, required=True, help="ArcGIS postcode parquet"
|
|
)
|
|
parser.add_argument(
|
|
"--output", type=Path, required=True, help="Output parquet path"
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
# Load Ofsted data: filter to good+ (1, 2) primary/secondary schools
|
|
ofsted = pl.read_parquet(args.ofsted).filter(
|
|
pl.col("Ofsted phase").is_in(["Primary", "Secondary"])
|
|
& pl.col("Overall effectiveness").is_in(["1", "2"])
|
|
)
|
|
|
|
print(f"Good+ schools: {len(ofsted):,}")
|
|
|
|
# Assign category based on phase
|
|
ofsted = ofsted.with_columns(
|
|
pl.when(pl.col("Ofsted phase") == "Primary")
|
|
.then(pl.lit("good_primary"))
|
|
.otherwise(pl.lit("good_secondary"))
|
|
.alias("category")
|
|
).select(
|
|
pl.col("Postcode").alias("postcode"),
|
|
"category",
|
|
)
|
|
|
|
# Join with arcgis to get lat/lng for each school's postcode
|
|
arcgis = pl.read_parquet(args.arcgis).select(
|
|
pl.col("pcds").alias("postcode"),
|
|
"lat",
|
|
pl.col("long").alias("lng"),
|
|
)
|
|
|
|
schools = ofsted.join(arcgis, on="postcode", how="inner")
|
|
print(f"Schools with coordinates: {len(schools):,}")
|
|
|
|
# Load all postcodes for proximity counting
|
|
postcodes = arcgis.rename({"lng": "lon"})
|
|
|
|
result = count_pois_per_postcode(
|
|
postcodes, schools, radius_km=5, groups=SCHOOL_GROUPS
|
|
)
|
|
|
|
result.write_parquet(args.output)
|
|
size_mb = args.output.stat().st_size / (1024 * 1024)
|
|
print(f"Wrote {args.output} ({size_mb:.1f} MB)")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|