don't crash
This commit is contained in:
parent
aab85fe32e
commit
d6d20ccd37
13 changed files with 2630 additions and 3924 deletions
|
|
@ -72,6 +72,18 @@ assert len(EXPECTED_BAND_NAMES) == len(AGE_BANDS), (
|
|||
"EXPECTED_BAND_NAMES and AGE_BANDS must stay aligned 1:1"
|
||||
)
|
||||
|
||||
# NOMIS sometimes labels a band with a wording variant that denotes the SAME
|
||||
# age range (e.g. "Aged 4 years and under" for ages 0-4, "Aged 90 years and
|
||||
# over" wording for the top band). Map such known-equivalent labels back to the
|
||||
# canonical name BEFORE validation so a real band change still fails loudly,
|
||||
# but a cosmetic relabel of an identical range does not block the build.
|
||||
BAND_NAME_ALIASES = {
|
||||
"Aged 4 years and under": "Aged 0 to 4 years",
|
||||
}
|
||||
assert set(BAND_NAME_ALIASES.values()) <= set(EXPECTED_BAND_NAMES), (
|
||||
"BAND_NAME_ALIASES must map to canonical EXPECTED_BAND_NAMES"
|
||||
)
|
||||
|
||||
|
||||
def compute_median_age(counts: list[int]) -> float:
|
||||
"""Compute median age from five-year band counts using linear interpolation."""
|
||||
|
|
@ -98,6 +110,15 @@ def _bands_to_median_table(pivoted: pl.DataFrame) -> pl.DataFrame:
|
|||
missing/extra/relabelled band would otherwise silently mis-align counts
|
||||
against the wrong AGE_BANDS lower bound, so we fail loudly instead.
|
||||
"""
|
||||
# Normalise known-equivalent NOMIS label variants to their canonical name
|
||||
# before validating (renaming onto an already-present canonical column would
|
||||
# collide, so polars raises loudly in that genuinely ambiguous case).
|
||||
rename_map = {
|
||||
c: BAND_NAME_ALIASES[c] for c in pivoted.columns if c in BAND_NAME_ALIASES
|
||||
}
|
||||
if rename_map:
|
||||
pivoted = pivoted.rename(rename_map)
|
||||
|
||||
# Validate the pivoted age-band columns against the canonical NOMIS set
|
||||
# BEFORE computing anything.
|
||||
band_cols = [c for c in pivoted.columns if c != "GEOGRAPHY_CODE"]
|
||||
|
|
|
|||
|
|
@ -266,8 +266,12 @@ def _download_tile(
|
|||
except (
|
||||
NoGeoTiffError,
|
||||
httpx.HTTPStatusError,
|
||||
httpx.TimeoutException,
|
||||
httpx.ConnectError,
|
||||
# TransportError is the superset of TimeoutException, ConnectError,
|
||||
# ReadError and ProtocolError — including RemoteProtocolError, raised
|
||||
# when the WCS server closes the connection mid-stream ("incomplete
|
||||
# chunked read"). All are transient; retry/split rather than letting
|
||||
# one flaky tile crash the whole raster download.
|
||||
httpx.TransportError,
|
||||
) as e:
|
||||
last_error = e
|
||||
if attempt < MAX_RETRIES:
|
||||
|
|
|
|||
|
|
@ -57,6 +57,21 @@ def test_null_band_count_is_treated_as_zero_not_crash():
|
|||
assert table["median_age"][0] == pytest.approx(32.5)
|
||||
|
||||
|
||||
def test_equivalent_band_label_alias_is_accepted():
|
||||
# NOMIS relabelled the first band "Aged 4 years and under" (same as ages
|
||||
# 0-4). It must be normalised to the canonical name and used as band 0, not
|
||||
# rejected. All 100 people in that band -> median in the 0-4 range: 2.5.
|
||||
counts_by_band = {name: [0] for name in EXPECTED_BAND_NAMES}
|
||||
counts_by_band["Aged 4 years and under"] = counts_by_band.pop("Aged 0 to 4 years")
|
||||
counts_by_band["Aged 4 years and under"] = [100]
|
||||
pivoted = _pivoted(counts_by_band)
|
||||
|
||||
table = median_age._bands_to_median_table(pivoted)
|
||||
|
||||
assert table.height == 1
|
||||
assert table["median_age"][0] == pytest.approx(2.5)
|
||||
|
||||
|
||||
def test_missing_band_raises_clear_error():
|
||||
counts_by_band = {name: [10] for name in EXPECTED_BAND_NAMES}
|
||||
del counts_by_band["Aged 85 years and over"]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue