perfect-postcode/pipeline/download/test_ethnicity.py

import polars as pl

from pipeline.download.ethnicity import _ethnicity_percentages


def test_ethnicity_percentages_recombines_predecessor_lads_by_population():
    rows = []
    for code, white, indian in [
        ("E07000026", 80, 20),
        ("E07000028", 10, 90),
    ]:
        total = white + indian
        rows.extend(
            [
                {
                    "Geography_code": code,
                    "Ethnicity_type": "ONS 2021 19+1",
                    "Ethnicity": "White British",
                    "Ethnic Population": white,
                    "Value1": white / total * 100,
                },
                {
                    "Geography_code": code,
                    "Ethnicity_type": "ONS 2021 19+1",
                    "Ethnicity": "Indian",
                    "Ethnic Population": indian,
                    "Value1": indian / total * 100,
                },
            ]
        )

    result = _ethnicity_percentages(pl.DataFrame(rows))

    cumberland = result.filter(pl.col("Geography_code") == "E06000063")
    assert cumberland.select("% White", "% South Asian").to_dicts() == [
        {"% White": 45.0, "% South Asian": 55.0}
    ]


def test_ethnicity_routes_any_other_asian_to_east_se_asian():
    """'Any Other Asian Background' and 'Chinese' both fold into '% East/SE Asian'
    (not '% South Asian'), fixing the East/SE Asian undercount."""
    rows = [
        {
            "Geography_code": "E06000001",
            "Ethnicity_type": "ONS 2021 19+1",
            "Ethnicity": ethnicity,
            "Ethnic Population": pop,
            "Value1": 0.0,
        }
        for ethnicity, pop in [
            ("Chinese", 30),
            ("Any Other Asian Background", 20),
            ("Indian", 50),
        ]
    ]

    result = _ethnicity_percentages(pl.DataFrame(rows))
    area = result.filter(pl.col("Geography_code") == "E06000001")

    assert "% East/SE Asian" in result.columns
    assert "% East Asian" not in result.columns
    assert area.select("% East/SE Asian", "% South Asian").to_dicts() == [
        {"% East/SE Asian": 50.0, "% South Asian": 50.0}
    ]