import numpy as np import polars as pl from pipeline.transform.school_catchments import ( capacity_fill_radii, children_per_postcode, classify_good_plus_schools, count_covering_catchments, equilibrium_cutoffs, phase_intakes, school_preference_bonuses, ) def _school(phase, oeif, ungraded, urn=100000): return { "URN": urn, "Postcode": "AA1 1AA", "Ofsted phase": phase, "Latest OEIF overall effectiveness": oeif, "Ungraded inspection overall outcome": ungraded, } def _classify(rows): result = classify_good_plus_schools(pl.DataFrame(rows)) return {(r["urn"], r["category"]) for r in result.to_dicts()} def test_legacy_oeif_grades_1_and_2_are_kept(): rows = [ _school("Primary", "1", None, 1), _school("Primary", "2", None, 2), _school("Secondary", "1", None, 3), _school("Secondary", "2", None, 4), ] assert _classify(rows) == { (1, "outstanding_primary"), (2, "good_primary"), (3, "outstanding_secondary"), (4, "good_secondary"), } def test_grades_3_and_4_are_excluded(): rows = [_school("Primary", "3", None), _school("Primary", "4", None)] assert _classify(rows) == set() def test_ungraded_remains_good_is_recovered_when_no_graded_result(): # Null and "Not judged" OEIF fall back to the ungraded outcome. rows = [ _school("Primary", None, "School remains Good", 1), _school("Secondary", "Not judged", "School remains Outstanding", 2), # "(Improving)" is still good+ ... _school("Primary", None, "School remains Good (Improving) - S5 Next", 3), ] assert _classify(rows) == { (1, "good_primary"), (2, "outstanding_secondary"), (3, "good_primary"), } def test_ungraded_concerns_are_not_good_plus(): # "(Concerns)" outcomes signal issues warranting earlier re-inspection and # must NOT be counted as good+ schools. rows = [ _school("Primary", None, "School remains Good (Concerns) - S5 Next", 1), _school( "Secondary", None, "School remains Outstanding (Concerns) - S5 Next", 2, ), ] assert _classify(rows) == set() def test_ungraded_non_good_outcomes_are_excluded(): rows = [ _school("Primary", None, "Some aspects not as strong"), _school("Primary", None, "Standards maintained"), _school("Primary", None, None), ] assert _classify(rows) == set() def test_genuine_grade_3_is_not_overridden_by_stale_remains_good(): # A real grade 3 must not be promoted by an ungraded "remains Good". rows = [_school("Primary", "3", "School remains Good")] assert _classify(rows) == set() def test_non_primary_secondary_phases_excluded(): rows = [ _school("Nursery", "1", None), _school("Not applicable", "2", None), ] assert _classify(rows) == set() def _aged_school(phase, oeif, low, high, urn=100000): return { "URN": urn, "Postcode": "AA1 1AA", "Ofsted phase": phase, "Latest OEIF overall effectiveness": oeif, "Ungraded inspection overall outcome": None, "Statutory lowest age": low, "Statutory highest age": high, } def test_all_through_school_counts_toward_both_primary_and_secondary(): # An all-through school (age 3-18) is labelled "Secondary" by Ofsted phase but # serves primary-age children too, so it must count in BOTH metrics. rows = [_aged_school("Secondary", "2", 3, 18, 1)] assert _classify(rows) == { (1, "good_primary"), (1, "good_secondary"), } def test_age_ranges_assign_single_phase_for_standard_schools(): rows = [ _aged_school("Primary", "1", 4, 11, 1), # primary only _aged_school("Secondary", "2", 11, 16, 2), # secondary only _aged_school("Secondary", "1", 9, 13, 3), # middle -> both ] assert _classify(rows) == { (1, "outstanding_primary"), (2, "good_secondary"), (3, "outstanding_primary"), (3, "outstanding_secondary"), } def test_closed_schools_excluded_when_open_register_given(): rows = [ _aged_school("Primary", "1", 4, 11, 111), _aged_school("Secondary", "2", 11, 16, 222), ] result = classify_good_plus_schools(pl.DataFrame(rows), open_urns={111}) pairs = {(r["urn"], r["category"]) for r in result.to_dicts()} # URN 222 is not in the open register, so it is dropped. assert pairs == {(111, "outstanding_primary")} def _gias_row( urn, type_group="Academies", age_range="4–11", pupils=210, capacity=None, admissions_policy=None, ): return { "urn": urn, "name": f"School {urn}", "lat": 51.5, "lng": -0.1, "type_group": type_group, "age_range": age_range, "pupils": pupils, "capacity": capacity, "admissions_policy": admissions_policy, } def test_phase_intakes_prorates_fill_target_over_weighted_cohorts(): intakes = phase_intakes( pl.DataFrame( [ # 4-11 = cohorts 4..10, all 7 primary: full fill target. _gias_row(1, age_range="4–11", pupils=210), # 11-16 = cohorts 11..15, all 5 secondary. _gias_row(2, age_range="11–16", pupils=500), # 3-11 = cohorts 3..10; nursery year weighs 0.5, so primary # gets 7 of 7.5 cohort weights. _gias_row(3, age_range="3–11", pupils=240), # All-through 4-16 = cohorts 4..15: 7/12 primary, 5/12 secondary. _gias_row(4, age_range="4–16", pupils=1200), # 11-18 = cohorts 11..17; sixth-form years weigh 0.6 each, so # secondary gets 5 of 6.2 cohort weights. _gias_row(5, age_range="11–18", pupils=1240), ] ) ).sort("urn") assert intakes["primary_intake"].to_list() == [210.0, 0.0, 224.0, 700.0, 0.0] assert intakes["secondary_intake"].to_list() == [0.0, 500.0, 0.0, 500.0, 1000.0] def test_phase_intakes_parses_one_sided_age_ranges(): """gias._format_age_range emits "up to {high}" and "{low}+" when a statutory age is missing; those schools must stay in the catchment supply instead of being silently dropped by a two-number parse.""" intakes = phase_intakes( pl.DataFrame( [ # "up to 11" = assumed cohorts 2..10: nursery years 2-3 weigh # 0.5 each, primary 4..10 weighs 7 -> primary 210 * 7/8. _gias_row(1, age_range="up to 11", pupils=210), # "16+" = assumed cohorts 16..18, all sixth form: no # primary/secondary intake, so the school contributes nothing # but must not crash the parse. _gias_row(2, age_range="16+", pupils=400), ] ) ).sort("urn") assert intakes["urn"].to_list() == [1, 2] assert intakes["primary_intake"].to_list() == [210.0 * 7 / 8, 0.0] assert intakes["secondary_intake"].to_list() == [0.0, 0.0] def test_phase_intakes_excludes_non_state_and_selective_schools(): intakes = phase_intakes( pl.DataFrame( [ _gias_row(1, type_group="Independent schools"), _gias_row(2, type_group="Special schools"), _gias_row(3, type_group="Welsh schools"), # Grammar school intakes are test-based and region-wide; a # distance catchment would be fabricated. _gias_row(4, admissions_policy="Selective"), _gias_row(5, pupils=None, capacity=300), _gias_row(6, pupils=None, capacity=None), # no usable headcount _gias_row(7, age_range=None), # no parsable cohorts # Over-full school keeps its demonstrated size. _gias_row(8, pupils=350, capacity=300), _gias_row(9, admissions_policy="Non-selective"), ] ) ).sort("urn") assert intakes["urn"].to_list() == [5, 8, 9] assert intakes["primary_intake"].to_list() == [300.0, 350.0, 210.0] def test_school_preference_bonuses_follow_derived_grade(): rows = [ {**_school("Primary", "1", None, 1)}, {**_school("Primary", "2", None, 2)}, {**_school("Primary", "3", None, 3)}, {**_school("Primary", "4", None, 4)}, {**_school("Primary", None, "Some aspects not as strong", 5)}, # unrated {**_school("Primary", "Not judged", "School remains Good", 6)}, ] bonuses = dict( school_preference_bonuses( pl.DataFrame(rows), bonus_outstanding_km=1.0, bonus_good_km=0.5 ).iter_rows() ) assert bonuses == {1: 1.0, 2: 0.5, 3: -0.5, 4: -1.0, 5: 0.0, 6: 0.5} def test_children_per_postcode_prorates_bands_and_splits_lsoa_evenly(): postcodes = pl.DataFrame( { "postcode": ["AA1 1AA", "AA1 1AB", "BB2 2BB"], "lat": [51.5, 51.5, 52.0], "lng": [-0.1, -0.1, -0.2], "lsoa21cd": ["E01000001", "E01000001", "E01000002"], } ) lsoa_children = pl.DataFrame( { "lsoa21": ["E01000001", "E01000002"], "aged_0_4": [100, 30], "aged_5_9": [100, 10], "aged_10_14": [100, 20], "aged_15_19": [100, 40], } ) result = children_per_postcode(postcodes, lsoa_children).sort("postcode") # Primary 4-10 = 0.2*aged_0_4 + aged_5_9 + 0.2*aged_10_14: 140 split across # the LSOA's 2 postcodes; 20 for the single-postcode LSOA. assert result["primary_children"].to_list() == [70.0, 70.0, 20.0] # Secondary 11-15 = 0.8*aged_10_14 + 0.2*aged_15_19: 100 split across 2; 24. assert result["secondary_children"].to_list() == [50.0, 50.0, 24.0] def test_equilibrium_cutoff_tightens_to_marginal_admitted_distance(): # One school with 10 places; postcodes at 1km, 2km and 3km with 5 children # each. The two nearest postcodes exactly fill it, so the cutoff is the # marginal admitted child's distance and the 3km postcode is shut out. cutoffs = equilibrium_cutoffs( np.array([[0.0, 0.0]]), np.array([10.0]), np.array([0.0]), np.array([[1.0, 0.0], [2.0, 0.0], [3.0, 0.0]]), np.array([5.0, 5.0, 5.0]), tau_km=0.0, ) assert cutoffs.tolist() == [2.0] def test_equilibrium_rejected_demand_cascades_to_next_school(): # School A (5 places) at the origin, school B (5 places) at 10km. # P1 (1km, 5 children) and P2 (1.5km, 5 children) both prefer A; A fills # with P1 and tightens its cutoff to 1km, pushing P2 out to B. B never # exceeds its target, so it keeps no binding cutoff. cutoffs = equilibrium_cutoffs( np.array([[0.0, 0.0], [10.0, 0.0]]), np.array([5.0, 5.0]), np.array([0.0, 0.0]), np.array([[1.0, 0.0], [1.5, 0.0]]), np.array([5.0, 5.0]), tau_km=0.0, ) assert cutoffs[0] == 1.0 assert np.isinf(cutoffs[1]) def test_equilibrium_preference_bonus_steers_demand_to_better_school(): # Two schools equidistant from the only postcode; school A is rated # better (0.5km bonus) so all children choose it; B attracts nobody. cutoffs = equilibrium_cutoffs( np.array([[0.0, 0.0], [2.0, 0.0]]), np.array([5.0, 5.0]), np.array([0.5, 0.0]), np.array([[1.0, 0.0]]), np.array([10.0]), tau_km=0.0, ) assert cutoffs[0] == 1.0 assert np.isinf(cutoffs[1]) def test_equilibrium_logit_choice_smears_demand_across_schools(): # With a positive temperature some families prefer the further school, so # both schools receive applications: the near school still fills and keeps # a binding cutoff, and the far school now attracts mass it would never # see under deterministic choice. cutoffs = equilibrium_cutoffs( np.array([[0.0, 0.0], [2.0, 0.0]]), np.array([4.0, 4.0]), np.array([0.0, 0.0]), np.array([[1.0, 0.0]]), np.array([10.0]), tau_km=1.0, ) # Each school gets half the 10 children (equidistant, equal utility), # exceeding both fill targets: both cutoffs bind at the postcode. assert cutoffs.tolist() == [1.0, 1.0] def test_capacity_fill_radii_covers_fill_target_population(): # Unfilled school needs 6 children: postcodes at 1km (5) and 2km (5) # cumulate past the target at 2km. A school needing more children than # exist within the cap keeps the cap. radii = capacity_fill_radii( np.array([[0.0, 0.0], [0.0, 0.0]]), np.array([6.0, 1000.0]), np.array([[1.0, 0.0], [2.0, 0.0], [3.0, 0.0]]), np.array([5.0, 5.0, 5.0]), max_radius_km=25.0, ) assert radii.tolist() == [2.0, 25.0] def test_count_covering_catchments_respects_radius_and_validity(): pc_xy = np.array([[0.0, 0.0], [3.0, 0.0], [10.0, 0.0], [0.5, 0.0]]) pc_valid = np.array([True, True, True, False]) school_xy = np.array([[0.0, 0.0], [2.0, 0.0]]) radii = np.array([4.0, 1.5]) counts = count_covering_catchments(pc_xy, pc_valid, school_xy, radii, 4) # pc0 is inside school 0 only (school 1 is 2km away > 1.5km radius); # pc1 inside both; pc2 inside neither; pc3 invalid -> 0 despite proximity. assert counts.tolist() == [1, 2, 0, 0] def test_count_covering_catchments_empty_schools(): counts = count_covering_catchments( np.zeros((2, 2)), np.array([True, True]), np.empty((0, 2)), np.empty(0), 2 ) assert counts.tolist() == [0, 0]