idk
This commit is contained in:
parent
a04ac2d857
commit
d43da9708c
47 changed files with 4120 additions and 573 deletions
|
|
@ -1,6 +1,10 @@
|
|||
import polars as pl
|
||||
|
||||
from pipeline.transform.transform_poi import transform_grocery_retail_points
|
||||
from pipeline.transform.transform_poi import (
|
||||
_load_ofsted_ratings,
|
||||
_school_icon_category_expr,
|
||||
transform_grocery_retail_points,
|
||||
)
|
||||
|
||||
|
||||
def test_transform_grocery_retail_points_outputs_chain_categories():
|
||||
|
|
@ -112,6 +116,33 @@ def test_transform_grocery_retail_points_merges_cooperative_societies():
|
|||
]
|
||||
|
||||
|
||||
def test_transform_grocery_retail_points_pools_small_coop_societies_before_cutoff():
|
||||
# Each Co-op society has <5 in-England stores; only after normalising to the
|
||||
# shared "Co-op" brand do they clear MIN_GROCERY_CHAIN_LOCATIONS together.
|
||||
societies = [
|
||||
"Central England Co-operative",
|
||||
"Lincolnshire Co-operative",
|
||||
"The Southern Co-operative",
|
||||
"Midcounties Co-operative",
|
||||
"Heart of England Co-operative",
|
||||
]
|
||||
raw = pl.DataFrame(
|
||||
{
|
||||
"id": list(range(1, len(societies) + 1)),
|
||||
"retailer": societies,
|
||||
"fascia": ["The Co-operative Food"] * len(societies),
|
||||
"store_name": [f"Co-op Test {i}" for i in range(1, len(societies) + 1)],
|
||||
"long_wgs": [-0.141] * len(societies),
|
||||
"lat_wgs": [51.515] * len(societies),
|
||||
}
|
||||
)
|
||||
|
||||
pois = transform_grocery_retail_points(raw)
|
||||
|
||||
assert pois.height == len(societies)
|
||||
assert pois["category"].unique().to_list() == ["Co-op"]
|
||||
|
||||
|
||||
def test_transform_grocery_retail_points_accepts_base_fascias():
|
||||
raw = pl.DataFrame(
|
||||
{
|
||||
|
|
@ -177,3 +208,87 @@ def test_transform_grocery_retail_points_includes_unmapped_chains_with_five_loca
|
|||
{"id": "glx-4", "category": "Tian Tian", "icon_category": "Tian Tian"},
|
||||
{"id": "glx-5", "category": "Tian Tian", "icon_category": "Tian Tian"},
|
||||
]
|
||||
|
||||
|
||||
def test_load_ofsted_ratings_falls_back_to_ungraded_outcome(tmp_path):
|
||||
# URNs 1-4: graded results map straight through. URNs 5-6: no usable graded
|
||||
# grade (null/"Not judged") but a good/outstanding ungraded outcome, incl.
|
||||
# the "(Concerns)"/"(Improving)" suffixes. URN 7: genuinely "Not judged".
|
||||
# URN 8: a real grade 3 must NOT be overridden by an ungraded outcome.
|
||||
ofsted_path = tmp_path / "ofsted.parquet"
|
||||
pl.DataFrame(
|
||||
{
|
||||
"URN": [1, 2, 3, 4, 5, 6, 7, 8],
|
||||
"Latest OEIF overall effectiveness": [
|
||||
"1",
|
||||
"2",
|
||||
"3",
|
||||
"4",
|
||||
None,
|
||||
"Not judged",
|
||||
"Not judged",
|
||||
"3",
|
||||
],
|
||||
"Ungraded inspection overall outcome": [
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
"School remains Outstanding",
|
||||
"School remains Good (Concerns)",
|
||||
None,
|
||||
"School remains Outstanding",
|
||||
],
|
||||
}
|
||||
).write_parquet(ofsted_path)
|
||||
|
||||
ratings = (
|
||||
_load_ofsted_ratings(ofsted_path)
|
||||
.collect()
|
||||
.sort("urn")
|
||||
.to_dicts()
|
||||
)
|
||||
|
||||
assert ratings == [
|
||||
{"urn": 1, "ofsted_rating": "Outstanding"},
|
||||
{"urn": 2, "ofsted_rating": "Good"},
|
||||
{"urn": 3, "ofsted_rating": "Requires improvement"},
|
||||
{"urn": 4, "ofsted_rating": "Inadequate"},
|
||||
{"urn": 5, "ofsted_rating": "Outstanding"},
|
||||
{"urn": 6, "ofsted_rating": "Good"},
|
||||
{"urn": 7, "ofsted_rating": "Not judged"},
|
||||
{"urn": 8, "ofsted_rating": "Requires improvement"},
|
||||
]
|
||||
|
||||
|
||||
def test_school_icon_category_handles_one_sided_age_ranges():
|
||||
# gias._format_age_range emits "up to {high}", "{low}+" and "{low}–{high}".
|
||||
# All three (plus null) must classify, not fall through to "School".
|
||||
df = pl.DataFrame(
|
||||
{
|
||||
"phase": [None, None, None, None, None],
|
||||
"type_group": [None, None, None, None, None],
|
||||
# "up to 5" -> nursery; "16+" -> sixth form; "3–18" -> all-through;
|
||||
# "4–11" -> primary; null age_range with null phase -> "School".
|
||||
"age_range": ["up to 5", "16+", "3–18", "4–11", None],
|
||||
},
|
||||
# Production reads these from a scanned parquet as String; an all-null
|
||||
# Python list would otherwise infer the Null dtype and break .str ops.
|
||||
schema_overrides={
|
||||
"phase": pl.String,
|
||||
"type_group": pl.String,
|
||||
"age_range": pl.String,
|
||||
},
|
||||
)
|
||||
|
||||
categories = df.select(
|
||||
_school_icon_category_expr().alias("category")
|
||||
)["category"].to_list()
|
||||
|
||||
assert categories == [
|
||||
"Nursery school",
|
||||
"Sixth form",
|
||||
"All-through school",
|
||||
"Primary school",
|
||||
"School",
|
||||
]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue