354 lines
12 KiB
Python
354 lines
12 KiB
Python
import numpy as np
|
||
import polars as pl
|
||
|
||
from pipeline.transform.school_catchments import (
|
||
capacity_fill_radii,
|
||
children_per_postcode,
|
||
classify_good_plus_schools,
|
||
count_covering_catchments,
|
||
equilibrium_cutoffs,
|
||
phase_intakes,
|
||
school_preference_bonuses,
|
||
)
|
||
|
||
|
||
def _school(phase, oeif, ungraded, urn=100000):
|
||
return {
|
||
"URN": urn,
|
||
"Postcode": "AA1 1AA",
|
||
"Ofsted phase": phase,
|
||
"Latest OEIF overall effectiveness": oeif,
|
||
"Ungraded inspection overall outcome": ungraded,
|
||
}
|
||
|
||
|
||
def _classify(rows):
|
||
result = classify_good_plus_schools(pl.DataFrame(rows))
|
||
return {(r["urn"], r["category"]) for r in result.to_dicts()}
|
||
|
||
|
||
def test_legacy_oeif_grades_1_and_2_are_kept():
|
||
rows = [
|
||
_school("Primary", "1", None, 1),
|
||
_school("Primary", "2", None, 2),
|
||
_school("Secondary", "1", None, 3),
|
||
_school("Secondary", "2", None, 4),
|
||
]
|
||
assert _classify(rows) == {
|
||
(1, "outstanding_primary"),
|
||
(2, "good_primary"),
|
||
(3, "outstanding_secondary"),
|
||
(4, "good_secondary"),
|
||
}
|
||
|
||
|
||
def test_grades_3_and_4_are_excluded():
|
||
rows = [_school("Primary", "3", None), _school("Primary", "4", None)]
|
||
assert _classify(rows) == set()
|
||
|
||
|
||
def test_ungraded_remains_good_is_recovered_when_no_graded_result():
|
||
# Null and "Not judged" OEIF fall back to the ungraded outcome.
|
||
rows = [
|
||
_school("Primary", None, "School remains Good", 1),
|
||
_school("Secondary", "Not judged", "School remains Outstanding", 2),
|
||
# "(Improving)" is still good+ ...
|
||
_school("Primary", None, "School remains Good (Improving) - S5 Next", 3),
|
||
]
|
||
assert _classify(rows) == {
|
||
(1, "good_primary"),
|
||
(2, "outstanding_secondary"),
|
||
(3, "good_primary"),
|
||
}
|
||
|
||
|
||
def test_ungraded_concerns_are_not_good_plus():
|
||
# "(Concerns)" outcomes signal issues warranting earlier re-inspection and
|
||
# must NOT be counted as good+ schools.
|
||
rows = [
|
||
_school("Primary", None, "School remains Good (Concerns) - S5 Next", 1),
|
||
_school(
|
||
"Secondary",
|
||
None,
|
||
"School remains Outstanding (Concerns) - S5 Next",
|
||
2,
|
||
),
|
||
]
|
||
assert _classify(rows) == set()
|
||
|
||
|
||
def test_ungraded_non_good_outcomes_are_excluded():
|
||
rows = [
|
||
_school("Primary", None, "Some aspects not as strong"),
|
||
_school("Primary", None, "Standards maintained"),
|
||
_school("Primary", None, None),
|
||
]
|
||
assert _classify(rows) == set()
|
||
|
||
|
||
def test_genuine_grade_3_is_not_overridden_by_stale_remains_good():
|
||
# A real grade 3 must not be promoted by an ungraded "remains Good".
|
||
rows = [_school("Primary", "3", "School remains Good")]
|
||
assert _classify(rows) == set()
|
||
|
||
|
||
def test_non_primary_secondary_phases_excluded():
|
||
rows = [
|
||
_school("Nursery", "1", None),
|
||
_school("Not applicable", "2", None),
|
||
]
|
||
assert _classify(rows) == set()
|
||
|
||
|
||
def _aged_school(phase, oeif, low, high, urn=100000):
|
||
return {
|
||
"URN": urn,
|
||
"Postcode": "AA1 1AA",
|
||
"Ofsted phase": phase,
|
||
"Latest OEIF overall effectiveness": oeif,
|
||
"Ungraded inspection overall outcome": None,
|
||
"Statutory lowest age": low,
|
||
"Statutory highest age": high,
|
||
}
|
||
|
||
|
||
def test_all_through_school_counts_toward_both_primary_and_secondary():
|
||
# An all-through school (age 3-18) is labelled "Secondary" by Ofsted phase but
|
||
# serves primary-age children too, so it must count in BOTH metrics.
|
||
rows = [_aged_school("Secondary", "2", 3, 18, 1)]
|
||
assert _classify(rows) == {
|
||
(1, "good_primary"),
|
||
(1, "good_secondary"),
|
||
}
|
||
|
||
|
||
def test_age_ranges_assign_single_phase_for_standard_schools():
|
||
rows = [
|
||
_aged_school("Primary", "1", 4, 11, 1), # primary only
|
||
_aged_school("Secondary", "2", 11, 16, 2), # secondary only
|
||
_aged_school("Secondary", "1", 9, 13, 3), # middle -> both
|
||
]
|
||
assert _classify(rows) == {
|
||
(1, "outstanding_primary"),
|
||
(2, "good_secondary"),
|
||
(3, "outstanding_primary"),
|
||
(3, "outstanding_secondary"),
|
||
}
|
||
|
||
|
||
def test_closed_schools_excluded_when_open_register_given():
|
||
rows = [
|
||
_aged_school("Primary", "1", 4, 11, 111),
|
||
_aged_school("Secondary", "2", 11, 16, 222),
|
||
]
|
||
result = classify_good_plus_schools(pl.DataFrame(rows), open_urns={111})
|
||
pairs = {(r["urn"], r["category"]) for r in result.to_dicts()}
|
||
# URN 222 is not in the open register, so it is dropped.
|
||
assert pairs == {(111, "outstanding_primary")}
|
||
|
||
|
||
def _gias_row(
|
||
urn,
|
||
type_group="Academies",
|
||
age_range="4–11",
|
||
pupils=210,
|
||
capacity=None,
|
||
admissions_policy=None,
|
||
):
|
||
return {
|
||
"urn": urn,
|
||
"name": f"School {urn}",
|
||
"lat": 51.5,
|
||
"lng": -0.1,
|
||
"type_group": type_group,
|
||
"age_range": age_range,
|
||
"pupils": pupils,
|
||
"capacity": capacity,
|
||
"admissions_policy": admissions_policy,
|
||
}
|
||
|
||
|
||
def test_phase_intakes_prorates_fill_target_over_weighted_cohorts():
|
||
intakes = phase_intakes(
|
||
pl.DataFrame(
|
||
[
|
||
# 4-11 = cohorts 4..10, all 7 primary: full fill target.
|
||
_gias_row(1, age_range="4–11", pupils=210),
|
||
# 11-16 = cohorts 11..15, all 5 secondary.
|
||
_gias_row(2, age_range="11–16", pupils=500),
|
||
# 3-11 = cohorts 3..10; nursery year weighs 0.5, so primary
|
||
# gets 7 of 7.5 cohort weights.
|
||
_gias_row(3, age_range="3–11", pupils=240),
|
||
# All-through 4-16 = cohorts 4..15: 7/12 primary, 5/12 secondary.
|
||
_gias_row(4, age_range="4–16", pupils=1200),
|
||
# 11-18 = cohorts 11..17; sixth-form years weigh 0.6 each, so
|
||
# secondary gets 5 of 6.2 cohort weights.
|
||
_gias_row(5, age_range="11–18", pupils=1240),
|
||
]
|
||
)
|
||
).sort("urn")
|
||
assert intakes["primary_intake"].to_list() == [210.0, 0.0, 224.0, 700.0, 0.0]
|
||
assert intakes["secondary_intake"].to_list() == [0.0, 500.0, 0.0, 500.0, 1000.0]
|
||
|
||
|
||
def test_phase_intakes_excludes_non_state_and_selective_schools():
|
||
intakes = phase_intakes(
|
||
pl.DataFrame(
|
||
[
|
||
_gias_row(1, type_group="Independent schools"),
|
||
_gias_row(2, type_group="Special schools"),
|
||
_gias_row(3, type_group="Welsh schools"),
|
||
# Grammar school intakes are test-based and region-wide; a
|
||
# distance catchment would be fabricated.
|
||
_gias_row(4, admissions_policy="Selective"),
|
||
_gias_row(5, pupils=None, capacity=300),
|
||
_gias_row(6, pupils=None, capacity=None), # no usable headcount
|
||
_gias_row(7, age_range=None), # no parsable cohorts
|
||
# Over-full school keeps its demonstrated size.
|
||
_gias_row(8, pupils=350, capacity=300),
|
||
_gias_row(9, admissions_policy="Non-selective"),
|
||
]
|
||
)
|
||
).sort("urn")
|
||
assert intakes["urn"].to_list() == [5, 8, 9]
|
||
assert intakes["primary_intake"].to_list() == [300.0, 350.0, 210.0]
|
||
|
||
|
||
def test_school_preference_bonuses_follow_derived_grade():
|
||
rows = [
|
||
{**_school("Primary", "1", None, 1)},
|
||
{**_school("Primary", "2", None, 2)},
|
||
{**_school("Primary", "3", None, 3)},
|
||
{**_school("Primary", "4", None, 4)},
|
||
{**_school("Primary", None, "Some aspects not as strong", 5)}, # unrated
|
||
{**_school("Primary", "Not judged", "School remains Good", 6)},
|
||
]
|
||
bonuses = dict(
|
||
school_preference_bonuses(
|
||
pl.DataFrame(rows), bonus_outstanding_km=1.0, bonus_good_km=0.5
|
||
).iter_rows()
|
||
)
|
||
assert bonuses == {1: 1.0, 2: 0.5, 3: -0.5, 4: -1.0, 5: 0.0, 6: 0.5}
|
||
|
||
|
||
def test_children_per_postcode_prorates_bands_and_splits_lsoa_evenly():
|
||
postcodes = pl.DataFrame(
|
||
{
|
||
"postcode": ["AA1 1AA", "AA1 1AB", "BB2 2BB"],
|
||
"lat": [51.5, 51.5, 52.0],
|
||
"lng": [-0.1, -0.1, -0.2],
|
||
"lsoa21cd": ["E01000001", "E01000001", "E01000002"],
|
||
}
|
||
)
|
||
lsoa_children = pl.DataFrame(
|
||
{
|
||
"lsoa21": ["E01000001", "E01000002"],
|
||
"aged_0_4": [100, 30],
|
||
"aged_5_9": [100, 10],
|
||
"aged_10_14": [100, 20],
|
||
"aged_15_19": [100, 40],
|
||
}
|
||
)
|
||
result = children_per_postcode(postcodes, lsoa_children).sort("postcode")
|
||
# Primary 4-10 = 0.2*aged_0_4 + aged_5_9 + 0.2*aged_10_14: 140 split across
|
||
# the LSOA's 2 postcodes; 20 for the single-postcode LSOA.
|
||
assert result["primary_children"].to_list() == [70.0, 70.0, 20.0]
|
||
# Secondary 11-15 = 0.8*aged_10_14 + 0.2*aged_15_19: 100 split across 2; 24.
|
||
assert result["secondary_children"].to_list() == [50.0, 50.0, 24.0]
|
||
|
||
|
||
def test_equilibrium_cutoff_tightens_to_marginal_admitted_distance():
|
||
# One school with 10 places; postcodes at 1km, 2km and 3km with 5 children
|
||
# each. The two nearest postcodes exactly fill it, so the cutoff is the
|
||
# marginal admitted child's distance and the 3km postcode is shut out.
|
||
cutoffs = equilibrium_cutoffs(
|
||
np.array([[0.0, 0.0]]),
|
||
np.array([10.0]),
|
||
np.array([0.0]),
|
||
np.array([[1.0, 0.0], [2.0, 0.0], [3.0, 0.0]]),
|
||
np.array([5.0, 5.0, 5.0]),
|
||
tau_km=0.0,
|
||
)
|
||
assert cutoffs.tolist() == [2.0]
|
||
|
||
|
||
def test_equilibrium_rejected_demand_cascades_to_next_school():
|
||
# School A (5 places) at the origin, school B (5 places) at 10km.
|
||
# P1 (1km, 5 children) and P2 (1.5km, 5 children) both prefer A; A fills
|
||
# with P1 and tightens its cutoff to 1km, pushing P2 out to B. B never
|
||
# exceeds its target, so it keeps no binding cutoff.
|
||
cutoffs = equilibrium_cutoffs(
|
||
np.array([[0.0, 0.0], [10.0, 0.0]]),
|
||
np.array([5.0, 5.0]),
|
||
np.array([0.0, 0.0]),
|
||
np.array([[1.0, 0.0], [1.5, 0.0]]),
|
||
np.array([5.0, 5.0]),
|
||
tau_km=0.0,
|
||
)
|
||
assert cutoffs[0] == 1.0
|
||
assert np.isinf(cutoffs[1])
|
||
|
||
|
||
def test_equilibrium_preference_bonus_steers_demand_to_better_school():
|
||
# Two schools equidistant from the only postcode; school A is rated
|
||
# better (0.5km bonus) so all children choose it; B attracts nobody.
|
||
cutoffs = equilibrium_cutoffs(
|
||
np.array([[0.0, 0.0], [2.0, 0.0]]),
|
||
np.array([5.0, 5.0]),
|
||
np.array([0.5, 0.0]),
|
||
np.array([[1.0, 0.0]]),
|
||
np.array([10.0]),
|
||
tau_km=0.0,
|
||
)
|
||
assert cutoffs[0] == 1.0
|
||
assert np.isinf(cutoffs[1])
|
||
|
||
|
||
def test_equilibrium_logit_choice_smears_demand_across_schools():
|
||
# With a positive temperature some families prefer the further school, so
|
||
# both schools receive applications: the near school still fills and keeps
|
||
# a binding cutoff, and the far school now attracts mass it would never
|
||
# see under deterministic choice.
|
||
cutoffs = equilibrium_cutoffs(
|
||
np.array([[0.0, 0.0], [2.0, 0.0]]),
|
||
np.array([4.0, 4.0]),
|
||
np.array([0.0, 0.0]),
|
||
np.array([[1.0, 0.0]]),
|
||
np.array([10.0]),
|
||
tau_km=1.0,
|
||
)
|
||
# Each school gets half the 10 children (equidistant, equal utility),
|
||
# exceeding both fill targets: both cutoffs bind at the postcode.
|
||
assert cutoffs.tolist() == [1.0, 1.0]
|
||
|
||
|
||
def test_capacity_fill_radii_covers_fill_target_population():
|
||
# Unfilled school needs 6 children: postcodes at 1km (5) and 2km (5)
|
||
# cumulate past the target at 2km. A school needing more children than
|
||
# exist within the cap keeps the cap.
|
||
radii = capacity_fill_radii(
|
||
np.array([[0.0, 0.0], [0.0, 0.0]]),
|
||
np.array([6.0, 1000.0]),
|
||
np.array([[1.0, 0.0], [2.0, 0.0], [3.0, 0.0]]),
|
||
np.array([5.0, 5.0, 5.0]),
|
||
max_radius_km=25.0,
|
||
)
|
||
assert radii.tolist() == [2.0, 25.0]
|
||
|
||
|
||
def test_count_covering_catchments_respects_radius_and_validity():
|
||
pc_xy = np.array([[0.0, 0.0], [3.0, 0.0], [10.0, 0.0], [0.5, 0.0]])
|
||
pc_valid = np.array([True, True, True, False])
|
||
school_xy = np.array([[0.0, 0.0], [2.0, 0.0]])
|
||
radii = np.array([4.0, 1.5])
|
||
counts = count_covering_catchments(pc_xy, pc_valid, school_xy, radii, 4)
|
||
# pc0 is inside school 0 only (school 1 is 2km away > 1.5km radius);
|
||
# pc1 inside both; pc2 inside neither; pc3 invalid -> 0 despite proximity.
|
||
assert counts.tolist() == [1, 2, 0, 0]
|
||
|
||
|
||
def test_count_covering_catchments_empty_schools():
|
||
counts = count_covering_catchments(
|
||
np.zeros((2, 2)), np.array([True, True]), np.empty((0, 2)), np.empty(0), 2
|
||
)
|
||
assert counts.tolist() == [0, 0]
|