alright
This commit is contained in:
parent
c645b0f1d4
commit
39ef5c6646
79 changed files with 5660 additions and 2199 deletions
|
|
@ -1316,17 +1316,122 @@ def transform_grocery_retail_points(
|
|||
).select("id", "name", "category", "icon_category", "group", "lat", "lng", "emoji")
|
||||
|
||||
|
||||
def transform_gias_schools(gias_path: Path) -> pl.LazyFrame:
|
||||
"""Convert the GIAS register parquet into POI rows with school metadata."""
|
||||
return pl.scan_parquet(gias_path).select(
|
||||
SCHOOL_ICON_CATEGORIES: dict[str, str] = {
|
||||
"Nursery school": "🧸",
|
||||
"Primary school": "🎒",
|
||||
"Secondary school": "🏫",
|
||||
"All-through school": "🏫",
|
||||
"Sixth form": "📚",
|
||||
"Further education college": "📚",
|
||||
"University": "🎓",
|
||||
"Special school": "🤝",
|
||||
"School": "🏫",
|
||||
}
|
||||
|
||||
|
||||
def _school_icon_category_expr() -> pl.Expr:
|
||||
"""Pick an icon category from GIAS phase/type_group/age_range. type_group
|
||||
wins for universities, FE colleges and special schools (which span multiple
|
||||
phases); otherwise phase determines the bucket. For independent and other
|
||||
non-statutory schools where GIAS leaves phase null, fall back to the
|
||||
age_range bounds so they still split into the right pill."""
|
||||
# GIAS phase mixes casing ("Middle deemed Primary" vs "Middle deemed
|
||||
# primary") so we normalise before matching.
|
||||
phase = pl.col("phase").str.to_lowercase()
|
||||
# age_range is "<min>–<max>" using an em-dash; both ends may be missing.
|
||||
age_parts = pl.col("age_range").str.split_exact("–", 1)
|
||||
min_age = age_parts.struct.field("field_0").cast(pl.Int32, strict=False)
|
||||
max_age = age_parts.struct.field("field_1").cast(pl.Int32, strict=False)
|
||||
return (
|
||||
pl.when(pl.col("type_group") == "Universities")
|
||||
.then(pl.lit("University"))
|
||||
.when(pl.col("type_group") == "Special schools")
|
||||
.then(pl.lit("Special school"))
|
||||
.when(pl.col("type_group") == "Colleges")
|
||||
.then(pl.lit("Further education college"))
|
||||
.when(phase == "nursery")
|
||||
.then(pl.lit("Nursery school"))
|
||||
.when(phase.is_in(["primary", "middle deemed primary"]))
|
||||
.then(pl.lit("Primary school"))
|
||||
.when(phase.is_in(["secondary", "middle deemed secondary"]))
|
||||
.then(pl.lit("Secondary school"))
|
||||
.when(phase == "all-through")
|
||||
.then(pl.lit("All-through school"))
|
||||
.when(phase.is_in(["16 plus", "sixth form"]))
|
||||
.then(pl.lit("Sixth form"))
|
||||
# Age-range fallback for null-phase rows (≈3k Independents + Academies
|
||||
# GIAS doesn't classify by phase).
|
||||
.when(max_age <= 5)
|
||||
.then(pl.lit("Nursery school"))
|
||||
.when(min_age >= 16)
|
||||
.then(pl.lit("Sixth form"))
|
||||
.when((min_age <= 6) & (max_age >= 16))
|
||||
.then(pl.lit("All-through school"))
|
||||
.when(max_age <= 11)
|
||||
.then(pl.lit("Primary school"))
|
||||
.when(min_age >= 10)
|
||||
.then(pl.lit("Secondary school"))
|
||||
.otherwise(pl.lit("School"))
|
||||
)
|
||||
|
||||
|
||||
OFSTED_OEIF_LABELS = {
|
||||
"1": "Outstanding",
|
||||
"2": "Good",
|
||||
"3": "Requires improvement",
|
||||
"4": "Inadequate",
|
||||
}
|
||||
|
||||
|
||||
def _load_ofsted_ratings(ofsted_path: Path) -> pl.LazyFrame:
|
||||
"""Project the latest OEIF effectiveness grade to a human-readable label,
|
||||
keyed by URN so it can be joined onto the GIAS register. Grades 1-4 map to
|
||||
the conventional Ofsted labels; "Not judged" (post-2025 reform schools that
|
||||
only have a report card) is preserved verbatim; null grades drop out."""
|
||||
grade_col = pl.col("Latest OEIF overall effectiveness")
|
||||
label = (
|
||||
pl.when(grade_col == "1")
|
||||
.then(pl.lit(OFSTED_OEIF_LABELS["1"]))
|
||||
.when(grade_col == "2")
|
||||
.then(pl.lit(OFSTED_OEIF_LABELS["2"]))
|
||||
.when(grade_col == "3")
|
||||
.then(pl.lit(OFSTED_OEIF_LABELS["3"]))
|
||||
.when(grade_col == "4")
|
||||
.then(pl.lit(OFSTED_OEIF_LABELS["4"]))
|
||||
.when(grade_col == "Not judged")
|
||||
.then(pl.lit("Not judged"))
|
||||
.otherwise(None)
|
||||
)
|
||||
return (
|
||||
pl.scan_parquet(ofsted_path)
|
||||
.select(
|
||||
pl.col("URN").cast(pl.Int64).alias("urn"),
|
||||
label.alias("ofsted_rating"),
|
||||
)
|
||||
.filter(pl.col("ofsted_rating").is_not_null())
|
||||
)
|
||||
|
||||
|
||||
def transform_gias_schools(gias_path: Path, ofsted_path: Path) -> pl.LazyFrame:
|
||||
"""Convert the GIAS register parquet into POI rows with school metadata.
|
||||
Ofsted ratings are joined by URN so each school carries its latest OEIF
|
||||
overall effectiveness grade (Outstanding/Good/Requires improvement/
|
||||
Inadequate/Not judged), surfaced in the map popup."""
|
||||
icon_category_expr = _school_icon_category_expr()
|
||||
emoji_expr = icon_category_expr.replace_strict(SCHOOL_ICON_CATEGORIES)
|
||||
ofsted = _load_ofsted_ratings(ofsted_path)
|
||||
# category mirrors icon_category so the dashboard renders one toggle per
|
||||
# school type (Nursery / Primary / Secondary / Sixth form / University /…)
|
||||
# instead of bundling every GIAS row under a single "School" pill.
|
||||
return pl.scan_parquet(gias_path).join(ofsted, on="urn", how="left").select(
|
||||
pl.concat_str([pl.lit("gias-"), pl.col("urn").cast(pl.String)]).alias("id"),
|
||||
pl.col("name"),
|
||||
pl.lit("School").alias("category"),
|
||||
pl.lit("School").alias("icon_category"),
|
||||
icon_category_expr.alias("category"),
|
||||
icon_category_expr.alias("icon_category"),
|
||||
pl.lit("Education").alias("group"),
|
||||
pl.col("lat").cast(pl.Float64),
|
||||
pl.col("lng").cast(pl.Float64),
|
||||
pl.lit("🏫").alias("emoji"),
|
||||
emoji_expr.alias("emoji"),
|
||||
pl.col("phase").alias("school_phase"),
|
||||
pl.col("type").alias("school_type"),
|
||||
pl.col("type_group").alias("school_type_group"),
|
||||
|
|
@ -1346,6 +1451,7 @@ def transform_gias_schools(gias_path: Path) -> pl.LazyFrame:
|
|||
pl.col("website").alias("school_website"),
|
||||
pl.col("telephone").cast(pl.String, strict=False).alias("school_telephone"),
|
||||
pl.col("head_name").alias("school_head_name"),
|
||||
pl.col("ofsted_rating").alias("school_ofsted_rating"),
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -1355,6 +1461,7 @@ def transform(
|
|||
boundary_path: Path,
|
||||
grocery_retail_points_path: Path,
|
||||
gias_path: Path,
|
||||
ofsted_path: Path,
|
||||
) -> pl.LazyFrame:
|
||||
lf = pl.scan_parquet(input_path)
|
||||
|
||||
|
|
@ -1420,7 +1527,12 @@ def transform(
|
|||
|
||||
grocery_df = pl.read_parquet(grocery_retail_points_path)
|
||||
grocery_pois = transform_grocery_retail_points(grocery_df, boundary_path)
|
||||
frames = [lf, naptan, grocery_pois.lazy(), transform_gias_schools(gias_path)]
|
||||
frames = [
|
||||
lf,
|
||||
naptan,
|
||||
grocery_pois.lazy(),
|
||||
transform_gias_schools(gias_path, ofsted_path),
|
||||
]
|
||||
|
||||
return pl.concat(frames, how="diagonal_relaxed")
|
||||
|
||||
|
|
@ -1453,6 +1565,12 @@ def main():
|
|||
required=True,
|
||||
help="GIAS schools register parquet (replaces OSM schools)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--ofsted",
|
||||
type=Path,
|
||||
required=True,
|
||||
help="Ofsted latest-inspections parquet (provides per-URN ratings)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output", type=Path, required=True, help="Output filtered POIs parquet file"
|
||||
)
|
||||
|
|
@ -1464,6 +1582,7 @@ def main():
|
|||
args.boundary,
|
||||
args.grocery_retail_points,
|
||||
args.gias,
|
||||
args.ofsted,
|
||||
).collect(engine="streaming")
|
||||
|
||||
df.write_parquet(args.output)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue