139 lines
4.6 KiB
Python
139 lines
4.6 KiB
Python
import polars as pl
|
|
|
|
from pipeline.transform.school_proximity import classify_good_plus_schools
|
|
|
|
|
|
def _school(phase, oeif, ungraded, postcode="AA1 1AA"):
|
|
return {
|
|
"Postcode": postcode,
|
|
"Ofsted phase": phase,
|
|
"Latest OEIF overall effectiveness": oeif,
|
|
"Ungraded inspection overall outcome": ungraded,
|
|
}
|
|
|
|
|
|
def _classify(rows):
|
|
result = classify_good_plus_schools(pl.DataFrame(rows))
|
|
return {(r["postcode"], r["category"]) for r in result.to_dicts()}
|
|
|
|
|
|
def test_legacy_oeif_grades_1_and_2_are_kept():
|
|
rows = [
|
|
_school("Primary", "1", None, "AA1 1AA"),
|
|
_school("Primary", "2", None, "AA1 1AB"),
|
|
_school("Secondary", "1", None, "AA1 1AC"),
|
|
_school("Secondary", "2", None, "AA1 1AD"),
|
|
]
|
|
assert _classify(rows) == {
|
|
("AA1 1AA", "outstanding_primary"),
|
|
("AA1 1AB", "good_primary"),
|
|
("AA1 1AC", "outstanding_secondary"),
|
|
("AA1 1AD", "good_secondary"),
|
|
}
|
|
|
|
|
|
def test_grades_3_and_4_are_excluded():
|
|
rows = [_school("Primary", "3", None), _school("Primary", "4", None)]
|
|
assert _classify(rows) == set()
|
|
|
|
|
|
def test_ungraded_remains_good_is_recovered_when_no_graded_result():
|
|
# Null and "Not judged" OEIF fall back to the ungraded outcome.
|
|
rows = [
|
|
_school("Primary", None, "School remains Good", "AA1 1AA"),
|
|
_school("Secondary", "Not judged", "School remains Outstanding", "AA1 1AB"),
|
|
# "(Improving)" is still good+ ...
|
|
_school("Primary", None, "School remains Good (Improving) - S5 Next", "AA1 1AE"),
|
|
]
|
|
assert _classify(rows) == {
|
|
("AA1 1AA", "good_primary"),
|
|
("AA1 1AB", "outstanding_secondary"),
|
|
("AA1 1AE", "good_primary"),
|
|
}
|
|
|
|
|
|
def test_ungraded_concerns_are_not_good_plus():
|
|
# "(Concerns)" outcomes signal issues warranting earlier re-inspection and
|
|
# must NOT be counted as good+ schools.
|
|
rows = [
|
|
_school("Primary", None, "School remains Good (Concerns) - S5 Next", "AA1 1AC"),
|
|
_school(
|
|
"Secondary",
|
|
None,
|
|
"School remains Outstanding (Concerns) - S5 Next",
|
|
"AA1 1AD",
|
|
),
|
|
]
|
|
assert _classify(rows) == set()
|
|
|
|
|
|
def test_ungraded_non_good_outcomes_are_excluded():
|
|
rows = [
|
|
_school("Primary", None, "Some aspects not as strong"),
|
|
_school("Primary", None, "Standards maintained"),
|
|
_school("Primary", None, None),
|
|
]
|
|
assert _classify(rows) == set()
|
|
|
|
|
|
def test_genuine_grade_3_is_not_overridden_by_stale_remains_good():
|
|
# A real grade 3 must not be promoted by an ungraded "remains Good".
|
|
rows = [_school("Primary", "3", "School remains Good")]
|
|
assert _classify(rows) == set()
|
|
|
|
|
|
def test_non_primary_secondary_phases_excluded():
|
|
rows = [
|
|
_school("Nursery", "1", None),
|
|
_school("Not applicable", "2", None),
|
|
]
|
|
assert _classify(rows) == set()
|
|
|
|
|
|
def _aged_school(phase, oeif, low, high, postcode="AA1 1AA"):
|
|
return {
|
|
"Postcode": postcode,
|
|
"Ofsted phase": phase,
|
|
"Latest OEIF overall effectiveness": oeif,
|
|
"Ungraded inspection overall outcome": None,
|
|
"URN": 100000,
|
|
"Statutory lowest age": low,
|
|
"Statutory highest age": high,
|
|
}
|
|
|
|
|
|
def test_all_through_school_counts_toward_both_primary_and_secondary():
|
|
# An all-through school (age 3-18) is labelled "Secondary" by Ofsted phase but
|
|
# serves primary-age children too, so it must count in BOTH metrics.
|
|
rows = [_aged_school("Secondary", "2", 3, 18, "AA1 1AA")]
|
|
assert _classify(rows) == {
|
|
("AA1 1AA", "good_primary"),
|
|
("AA1 1AA", "good_secondary"),
|
|
}
|
|
|
|
|
|
def test_age_ranges_assign_single_phase_for_standard_schools():
|
|
rows = [
|
|
_aged_school("Primary", "1", 4, 11, "AA1 1AA"), # primary only
|
|
_aged_school("Secondary", "2", 11, 16, "AA1 1AB"), # secondary only
|
|
_aged_school("Secondary", "1", 9, 13, "AA1 1AC"), # middle -> both
|
|
]
|
|
assert _classify(rows) == {
|
|
("AA1 1AA", "outstanding_primary"),
|
|
("AA1 1AB", "good_secondary"),
|
|
("AA1 1AC", "outstanding_primary"),
|
|
("AA1 1AC", "outstanding_secondary"),
|
|
}
|
|
|
|
|
|
def test_closed_schools_excluded_when_open_register_given():
|
|
rows = [
|
|
_aged_school("Primary", "1", 4, 11, "AA1 1AA"),
|
|
_aged_school("Secondary", "2", 11, 16, "AA1 1AB"),
|
|
]
|
|
rows[0]["URN"] = 111
|
|
rows[1]["URN"] = 222
|
|
result = classify_good_plus_schools(pl.DataFrame(rows), open_urns={111})
|
|
pairs = {(r["postcode"], r["category"]) for r in result.to_dicts()}
|
|
# URN 222 is not in the open register, so it is dropped.
|
|
assert pairs == {("AA1 1AA", "outstanding_primary")}
|