import polars as pl from pipeline.download.ethnicity import _ethnicity_percentages def test_ethnicity_percentages_recombines_predecessor_lads_by_population(): rows = [] for code, white, indian in [ ("E07000026", 80, 20), ("E07000028", 10, 90), ]: total = white + indian rows.extend( [ { "Geography_code": code, "Ethnicity_type": "ONS 2021 19+1", "Ethnicity": "White British", "Ethnic Population": white, "Value1": white / total * 100, }, { "Geography_code": code, "Ethnicity_type": "ONS 2021 19+1", "Ethnicity": "Indian", "Ethnic Population": indian, "Value1": indian / total * 100, }, ] ) result = _ethnicity_percentages(pl.DataFrame(rows)) cumberland = result.filter(pl.col("Geography_code") == "E06000063") assert cumberland.select("% White", "% South Asian").to_dicts() == [ {"% White": 45.0, "% South Asian": 55.0} ] def test_ethnicity_routes_any_other_asian_to_east_se_asian(): """'Any Other Asian Background' and 'Chinese' both fold into '% East/SE Asian' (not '% South Asian'), fixing the East/SE Asian undercount.""" rows = [ { "Geography_code": "E06000001", "Ethnicity_type": "ONS 2021 19+1", "Ethnicity": ethnicity, "Ethnic Population": pop, "Value1": 0.0, } for ethnicity, pop in [ ("Chinese", 30), ("Any Other Asian Background", 20), ("Indian", 50), ] ] result = _ethnicity_percentages(pl.DataFrame(rows)) area = result.filter(pl.col("Geography_code") == "E06000001") assert "% East/SE Asian" in result.columns assert "% East Asian" not in result.columns assert area.select("% East/SE Asian", "% South Asian").to_dicts() == [ {"% East/SE Asian": 50.0, "% South Asian": 50.0} ]