perfect-postcode/pipeline/download/test_noise.py
2026-06-02 20:14:32 +01:00

363 lines
12 KiB
Python

import httpx
import numpy as np
import pytest
import rasterio
from rasterio.io import MemoryFile
from rasterio.transform import from_origin
from pipeline.download import noise
def _tiny_geotiff_bytes() -> bytes:
data = np.array([[55]], dtype=np.uint8)
with MemoryFile() as memfile:
with memfile.open(
driver="GTiff",
height=data.shape[0],
width=data.shape[1],
count=1,
dtype=data.dtype,
crs="EPSG:27700",
transform=from_origin(0, 1, 1, 1),
) as dataset:
dataset.write(data, 1)
return bytes(memfile.getbuffer())
def test_download_tile_splits_after_retries(monkeypatch, tmp_path):
monkeypatch.setattr(noise, "MAX_RETRIES", 1)
monkeypatch.setattr(noise, "MIN_TILE_SIZE", 50)
tile_bytes = _tiny_geotiff_bytes()
def fake_fetch_tile_bytes(
wcs_base,
coverage_id,
min_e,
min_n,
max_e,
max_n,
wcs_version="1.0.0",
):
if max_e - min_e > 50 or max_n - min_n > 50:
raise httpx.TimeoutException("too large")
return tile_bytes
monkeypatch.setattr(noise, "_fetch_tile_bytes", fake_fetch_tile_bytes)
paths, failures = noise._download_tile("base", "coverage", 0, 0, 100, 100, tmp_path)
assert failures == []
assert len(paths) == 4
assert sorted(path.name for path in paths) == [
"tile_0_0_50_50.tif",
"tile_0_50_50_100.tif",
"tile_50_0_100_50.tif",
"tile_50_50_100_100.tif",
]
def test_download_tile_reports_unsplittable_failure(monkeypatch, tmp_path):
monkeypatch.setattr(noise, "MAX_RETRIES", 1)
monkeypatch.setattr(noise, "MIN_TILE_SIZE", 100)
def fake_fetch_tile_bytes(*args, **kwargs):
raise httpx.ConnectError("offline")
monkeypatch.setattr(noise, "_fetch_tile_bytes", fake_fetch_tile_bytes)
paths, failures = noise._download_tile("base", "coverage", 0, 0, 100, 100, tmp_path)
assert paths == []
assert failures == [(0, 0, 100, 100)]
def test_download_tile_treats_non_tiff_response_as_failure(monkeypatch, tmp_path):
monkeypatch.setattr(noise, "MAX_RETRIES", 1)
monkeypatch.setattr(noise, "MIN_TILE_SIZE", 100)
def fake_fetch_tile_bytes(*args, **kwargs):
raise noise.NoGeoTiffError("xml exception")
monkeypatch.setattr(noise, "_fetch_tile_bytes", fake_fetch_tile_bytes)
paths, failures = noise._download_tile("base", "coverage", 0, 0, 100, 100, tmp_path)
assert paths == []
assert failures == [(0, 0, 100, 100)]
def test_download_raster_tolerates_missing_tiles_when_allowed(monkeypatch, tmp_path):
monkeypatch.setattr(noise, "BNG_MIN_E", 0)
monkeypatch.setattr(noise, "BNG_MAX_E", 100)
monkeypatch.setattr(noise, "BNG_MIN_N", 0)
monkeypatch.setattr(noise, "BNG_MAX_N", 100)
monkeypatch.setattr(noise, "TILE_SIZE", 100)
def fake_download_tile(*args, **kwargs):
return [], [(0, 0, 100, 100)]
monkeypatch.setattr(noise, "_download_tile", fake_download_tile)
paths = noise.download_raster(
tmp_path,
"base",
"coverage",
"Airport",
allow_missing_tiles=True,
)
assert paths == []
def test_download_raster_raises_on_missing_strict_tiles(monkeypatch, tmp_path):
monkeypatch.setattr(noise, "BNG_MIN_E", 0)
monkeypatch.setattr(noise, "BNG_MAX_E", 100)
monkeypatch.setattr(noise, "BNG_MIN_N", 0)
monkeypatch.setattr(noise, "BNG_MAX_N", 100)
monkeypatch.setattr(noise, "TILE_SIZE", 100)
def fake_download_tile(*args, **kwargs):
return [], [(0, 0, 100, 100)]
monkeypatch.setattr(noise, "_download_tile", fake_download_tile)
with pytest.raises(RuntimeError, match=r"\[Road\] Failed to download"):
noise.download_raster(tmp_path, "base", "coverage", "Road")
def test_generate_tiles_neighbours_overlap_by_radius():
tile_size = 20_000
overlap = noise.POSTCODE_NOISE_RADIUS_M
tiles = noise._generate_tiles(
0, 60_000, 0, 60_000, tile_size, overlap, tile_size
)
by_origin = {(min_e, min_n): (max_e, max_n) for min_e, min_n, max_e, max_n in tiles}
# Horizontally adjacent tiles must overlap by >= overlap.
for (min_e, min_n), (max_e, _max_n) in by_origin.items():
right_origin = (min_e + tile_size, min_n)
if right_origin in by_origin:
assert max_e - right_origin[0] >= overlap
# Vertically adjacent tiles must overlap by >= overlap.
for (min_e, min_n), (_max_e, max_n) in by_origin.items():
up_origin = (min_e, min_n + tile_size)
if up_origin in by_origin:
assert max_n - up_origin[1] >= overlap
def test_generate_tiles_clamps_to_grid_extent():
tile_size = 20_000
overlap = noise.POSTCODE_NOISE_RADIUS_M
tiles = noise._generate_tiles(
noise.BNG_MAX_E - tile_size,
noise.BNG_MAX_E,
noise.BNG_MAX_N - tile_size,
noise.BNG_MAX_N,
tile_size,
overlap,
tile_size,
)
# The final (top-right) tile cannot extend past the England extent even
# though the overlap would otherwise push it beyond.
for _min_e, _min_n, max_e, max_n in tiles:
assert max_e <= noise.BNG_MAX_E
assert max_n <= noise.BNG_MAX_N
def _write_geotiff(path, data, left, top, resolution, nodata):
with rasterio.open(
path,
"w",
driver="GTiff",
height=data.shape[0],
width=data.shape[1],
count=1,
dtype=data.dtype,
crs="EPSG:27700",
transform=from_origin(left, top, resolution, resolution),
nodata=nodata,
) as dataset:
dataset.write(data, 1)
def test_sample_noise_recovers_value_across_overlapping_seam(monkeypatch, tmp_path):
monkeypatch.setattr(noise, "POSTCODE_NOISE_RADIUS_M", 50)
monkeypatch.setattr(noise, "RESOLUTION", 10)
# Two download tiles share a vertical seam at easting=100. _generate_tiles
# decides their real footprints: with the overlap fix the LEFT tile extends
# past the seam by POSTCODE_NOISE_RADIUS_M and thus covers a loud cell that
# physically sits just across the seam.
tile_size = 100
overlap = noise.POSTCODE_NOISE_RADIUS_M
tiles = noise._generate_tiles(0, 200, 0, 100, tile_size, overlap, tile_size)
by_origin = {
(min_e, min_n): (max_e, max_n) for min_e, min_n, max_e, max_n in tiles
}
left_min_e, left_min_n = 0, 0
left_max_e, left_max_n = by_origin[(left_min_e, left_min_n)]
# Overlap fix is what makes the left tile reach across the seam.
assert left_max_e > 100
# The loud 70 dB cell centre is at easting 105 (just across the seam) and
# the postcode point is at easting 75 in the left tile, within 50m of it.
res = noise.RESOLUTION
width = int((left_max_e - left_min_e) // res)
height = int((left_max_n - left_min_n) // res)
left_data = np.zeros((height, width), dtype=np.float32)
loud_row = height - 1 - int((25 - left_min_n) // res) # northing ~25
loud_col = int((105 - left_min_e) // res) # easting ~105
left_data[loud_row, loud_col] = 70.0
_write_geotiff(
tmp_path / "left.tif", left_data, left_min_e, left_max_n, res, nodata=0
)
# The right tile holds the same loud cell but the postcode point is NOT
# inside it, so without overlap the value would be lost for that point.
right_min_e, right_min_n = 100, 0
right_max_e, right_max_n = by_origin[(right_min_e, right_min_n)]
rwidth = int((right_max_e - right_min_e) // res)
rheight = int((right_max_n - right_min_n) // res)
right_data = np.zeros((rheight, rwidth), dtype=np.float32)
right_data[rheight - 1 - int((25 - right_min_n) // res), 0] = 70.0
_write_geotiff(
tmp_path / "right.tif", right_data, right_min_e, right_max_n, res, nodata=0
)
result = noise.sample_noise_at_postcodes(
[tmp_path / "left.tif", tmp_path / "right.tif"],
easting=np.array([75.0]),
northing=np.array([25.0]),
label="Road",
col_name="road_noise_lden_db",
)
assert result.to_list() == [70.0]
def test_sample_noise_distinguishes_nodata_from_in_coverage_quiet(
monkeypatch, tmp_path
):
monkeypatch.setattr(noise, "POSTCODE_NOISE_RADIUS_M", 0)
monkeypatch.setattr(noise, "RESOLUTION", 10)
# Defra encodes TRUE nodata as the -96.0 sentinel; genuinely quiet ground
# below the lowest reporting band is 0.0. With a 0m radius each postcode
# reads exactly one cell, so we can pin behaviour per cell:
# -96.0 sentinel -> null ("we don't know")
# 0.0 in-coverage -> NOISE_QUIET_FLOOR_DB ("we know it's quiet")
# 65.0 -> 65.0 (a real modelled reading)
data = np.array(
[
[-96.0, 0.0, 65.0],
],
dtype=np.float32,
)
_write_geotiff(tmp_path / "noise.tif", data, 0, 10, 10, nodata=-96.0)
result = noise.sample_noise_at_postcodes(
[tmp_path / "noise.tif"],
# Cell centres at easting 5 (nodata), 15 (quiet 0.0), 25 (loud 65).
easting=np.array([5.0, 15.0, 25.0]),
northing=np.array([5.0, 5.0, 5.0]),
label="Road",
col_name="road_noise_lden_db",
)
assert result.to_list() == [None, float(noise.NOISE_QUIET_FLOOR_DB), 65.0]
def test_sample_noise_preserves_genuine_reading_above_quiet_floor(monkeypatch, tmp_path):
monkeypatch.setattr(noise, "POSTCODE_NOISE_RADIUS_M", 0)
monkeypatch.setattr(noise, "RESOLUTION", 10)
# The lowest Defra reporting band is 40.0 dB; genuine readings populate
# [40, ~80]. A genuine in-coverage reading at or just above the floor must be
# PRESERVED, not clamped UP to the floor — only true-quiet 0.0 is floored. A
# quiet floor set too high (e.g. 45) would inflate the ~35% of real 40-44.99
# dB readings; this pins that they survive unchanged.
floor = float(noise.NOISE_QUIET_FLOOR_DB)
data = np.array(
[
[42.0, floor, 0.0],
],
dtype=np.float32,
)
_write_geotiff(tmp_path / "noise.tif", data, 0, 10, 10, nodata=-96.0)
result = noise.sample_noise_at_postcodes(
[tmp_path / "noise.tif"],
# Cell centres at easting 5 (42 dB), 15 (floor), 25 (quiet 0.0).
easting=np.array([5.0, 15.0, 25.0]),
northing=np.array([5.0, 5.0, 5.0]),
label="Road",
col_name="road_noise_lden_db",
)
# 42 preserved (NOT raised to the floor), floor preserved, 0.0 -> floor.
assert result.to_list() == [42.0, floor, floor]
# The floor must sit at/below the lowest genuine reading so nothing inflates.
assert floor <= 42.0
def test_sample_noise_nodata_window_stays_null(monkeypatch, tmp_path):
monkeypatch.setattr(noise, "POSTCODE_NOISE_RADIUS_M", 15)
monkeypatch.setattr(noise, "RESOLUTION", 10)
# A postcode whose entire 3x3 max-window is the -96.0 sentinel must remain
# null: no in-coverage cell was read, so "quiet" must NOT be inferred.
data = np.full((5, 5), -96.0, dtype=np.float32)
data[4, 4] = 70.0 # one loud cell, far from the nodata corner
_write_geotiff(tmp_path / "noise.tif", data, 0, 50, 10, nodata=-96.0)
result = noise.sample_noise_at_postcodes(
[tmp_path / "noise.tif"],
# Top-left point: its 3x3 window is cells (rows 0-1, cols 0-1) = all -96.
easting=np.array([5.0]),
northing=np.array([45.0]),
label="Road",
col_name="road_noise_lden_db",
)
assert result.to_list() == [None]
def test_sample_noise_at_postcodes_uses_local_maximum(monkeypatch, tmp_path):
monkeypatch.setattr(noise, "POSTCODE_NOISE_RADIUS_M", 15)
monkeypatch.setattr(noise, "RESOLUTION", 10)
tile_path = tmp_path / "noise.tif"
data = np.array(
[
[0, 0, 0, 0, 0],
[0, 70, 0, 0, 0],
[0, 0, 55, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0],
],
dtype=np.float32,
)
with rasterio.open(
tile_path,
"w",
driver="GTiff",
height=data.shape[0],
width=data.shape[1],
count=1,
dtype=data.dtype,
crs="EPSG:27700",
transform=from_origin(0, 50, 10, 10),
nodata=0,
) as dataset:
dataset.write(data, 1)
result = noise.sample_noise_at_postcodes(
[tile_path],
easting=np.array([25.0]),
northing=np.array([25.0]),
label="Road",
col_name="road_noise_lden_db",
)
assert result.to_list() == [70.0]