import polars as pl from pipeline.transform.school_proximity import classify_good_plus_schools def _school(phase, oeif, ungraded, postcode="AA1 1AA"): return { "Postcode": postcode, "Ofsted phase": phase, "Latest OEIF overall effectiveness": oeif, "Ungraded inspection overall outcome": ungraded, } def _classify(rows): result = classify_good_plus_schools(pl.DataFrame(rows)) return {(r["postcode"], r["category"]) for r in result.to_dicts()} def test_legacy_oeif_grades_1_and_2_are_kept(): rows = [ _school("Primary", "1", None, "AA1 1AA"), _school("Primary", "2", None, "AA1 1AB"), _school("Secondary", "1", None, "AA1 1AC"), _school("Secondary", "2", None, "AA1 1AD"), ] assert _classify(rows) == { ("AA1 1AA", "outstanding_primary"), ("AA1 1AB", "good_primary"), ("AA1 1AC", "outstanding_secondary"), ("AA1 1AD", "good_secondary"), } def test_grades_3_and_4_are_excluded(): rows = [_school("Primary", "3", None), _school("Primary", "4", None)] assert _classify(rows) == set() def test_ungraded_remains_good_is_recovered_when_no_graded_result(): # Null and "Not judged" OEIF fall back to the ungraded outcome. rows = [ _school("Primary", None, "School remains Good", "AA1 1AA"), _school("Secondary", "Not judged", "School remains Outstanding", "AA1 1AB"), # "(Improving)" is still good+ ... _school("Primary", None, "School remains Good (Improving) - S5 Next", "AA1 1AE"), ] assert _classify(rows) == { ("AA1 1AA", "good_primary"), ("AA1 1AB", "outstanding_secondary"), ("AA1 1AE", "good_primary"), } def test_ungraded_concerns_are_not_good_plus(): # "(Concerns)" outcomes signal issues warranting earlier re-inspection and # must NOT be counted as good+ schools. rows = [ _school("Primary", None, "School remains Good (Concerns) - S5 Next", "AA1 1AC"), _school( "Secondary", None, "School remains Outstanding (Concerns) - S5 Next", "AA1 1AD", ), ] assert _classify(rows) == set() def test_ungraded_non_good_outcomes_are_excluded(): rows = [ _school("Primary", None, "Some aspects not as strong"), _school("Primary", None, "Standards maintained"), _school("Primary", None, None), ] assert _classify(rows) == set() def test_genuine_grade_3_is_not_overridden_by_stale_remains_good(): # A real grade 3 must not be promoted by an ungraded "remains Good". rows = [_school("Primary", "3", "School remains Good")] assert _classify(rows) == set() def test_non_primary_secondary_phases_excluded(): rows = [ _school("Nursery", "1", None), _school("Not applicable", "2", None), ] assert _classify(rows) == set() def _aged_school(phase, oeif, low, high, postcode="AA1 1AA"): return { "Postcode": postcode, "Ofsted phase": phase, "Latest OEIF overall effectiveness": oeif, "Ungraded inspection overall outcome": None, "URN": 100000, "Statutory lowest age": low, "Statutory highest age": high, } def test_all_through_school_counts_toward_both_primary_and_secondary(): # An all-through school (age 3-18) is labelled "Secondary" by Ofsted phase but # serves primary-age children too, so it must count in BOTH metrics. rows = [_aged_school("Secondary", "2", 3, 18, "AA1 1AA")] assert _classify(rows) == { ("AA1 1AA", "good_primary"), ("AA1 1AA", "good_secondary"), } def test_age_ranges_assign_single_phase_for_standard_schools(): rows = [ _aged_school("Primary", "1", 4, 11, "AA1 1AA"), # primary only _aged_school("Secondary", "2", 11, 16, "AA1 1AB"), # secondary only _aged_school("Secondary", "1", 9, 13, "AA1 1AC"), # middle -> both ] assert _classify(rows) == { ("AA1 1AA", "outstanding_primary"), ("AA1 1AB", "good_secondary"), ("AA1 1AC", "outstanding_primary"), ("AA1 1AC", "outstanding_secondary"), } def test_closed_schools_excluded_when_open_register_given(): rows = [ _aged_school("Primary", "1", 4, 11, "AA1 1AA"), _aged_school("Secondary", "2", 11, 16, "AA1 1AB"), ] rows[0]["URN"] = 111 rows[1]["URN"] = 222 result = classify_good_plus_schools(pl.DataFrame(rows), open_urns={111}) pairs = {(r["postcode"], r["category"]) for r in result.to_dicts()} # URN 222 is not in the open register, so it is dropped. assert pairs == {("AA1 1AA", "outstanding_primary")}