393 lines
14 KiB
Python
393 lines
14 KiB
Python
import httpx
|
|
import numpy as np
|
|
import pytest
|
|
import rasterio
|
|
from rasterio.io import MemoryFile
|
|
from rasterio.transform import from_origin
|
|
|
|
from pipeline.download import noise
|
|
|
|
|
|
def _tiny_geotiff_bytes() -> bytes:
|
|
data = np.array([[55]], dtype=np.uint8)
|
|
with MemoryFile() as memfile:
|
|
with memfile.open(
|
|
driver="GTiff",
|
|
height=data.shape[0],
|
|
width=data.shape[1],
|
|
count=1,
|
|
dtype=data.dtype,
|
|
crs="EPSG:27700",
|
|
transform=from_origin(0, 1, 1, 1),
|
|
) as dataset:
|
|
dataset.write(data, 1)
|
|
return bytes(memfile.getbuffer())
|
|
|
|
|
|
def test_download_tile_splits_after_retries(monkeypatch, tmp_path):
|
|
monkeypatch.setattr(noise, "MAX_RETRIES", 1)
|
|
monkeypatch.setattr(noise, "MIN_TILE_SIZE", 50)
|
|
tile_bytes = _tiny_geotiff_bytes()
|
|
|
|
def fake_fetch_tile_bytes(
|
|
wcs_base,
|
|
coverage_id,
|
|
min_e,
|
|
min_n,
|
|
max_e,
|
|
max_n,
|
|
wcs_version="1.0.0",
|
|
):
|
|
if max_e - min_e > 50 or max_n - min_n > 50:
|
|
raise httpx.TimeoutException("too large")
|
|
return tile_bytes
|
|
|
|
monkeypatch.setattr(noise, "_fetch_tile_bytes", fake_fetch_tile_bytes)
|
|
|
|
paths, failures = noise._download_tile("base", "coverage", 0, 0, 100, 100, tmp_path)
|
|
|
|
assert failures == []
|
|
assert len(paths) == 4
|
|
assert sorted(path.name for path in paths) == [
|
|
"tile_0_0_50_50.tif",
|
|
"tile_0_50_50_100.tif",
|
|
"tile_50_0_100_50.tif",
|
|
"tile_50_50_100_100.tif",
|
|
]
|
|
|
|
|
|
def test_download_tile_reports_unsplittable_failure(monkeypatch, tmp_path):
|
|
monkeypatch.setattr(noise, "MAX_RETRIES", 1)
|
|
monkeypatch.setattr(noise, "MIN_TILE_SIZE", 100)
|
|
|
|
def fake_fetch_tile_bytes(*args, **kwargs):
|
|
raise httpx.ConnectError("offline")
|
|
|
|
monkeypatch.setattr(noise, "_fetch_tile_bytes", fake_fetch_tile_bytes)
|
|
|
|
paths, failures = noise._download_tile("base", "coverage", 0, 0, 100, 100, tmp_path)
|
|
|
|
assert paths == []
|
|
assert failures == [(0, 0, 100, 100)]
|
|
|
|
|
|
def test_download_tile_treats_non_tiff_response_as_failure(monkeypatch, tmp_path):
|
|
monkeypatch.setattr(noise, "MAX_RETRIES", 1)
|
|
monkeypatch.setattr(noise, "MIN_TILE_SIZE", 100)
|
|
|
|
def fake_fetch_tile_bytes(*args, **kwargs):
|
|
raise noise.NoGeoTiffError("xml exception")
|
|
|
|
monkeypatch.setattr(noise, "_fetch_tile_bytes", fake_fetch_tile_bytes)
|
|
|
|
paths, failures = noise._download_tile("base", "coverage", 0, 0, 100, 100, tmp_path)
|
|
|
|
assert paths == []
|
|
assert failures == [(0, 0, 100, 100)]
|
|
|
|
|
|
def test_download_raster_tolerates_missing_tiles_when_allowed(monkeypatch, tmp_path):
|
|
monkeypatch.setattr(noise, "BNG_MIN_E", 0)
|
|
monkeypatch.setattr(noise, "BNG_MAX_E", 100)
|
|
monkeypatch.setattr(noise, "BNG_MIN_N", 0)
|
|
monkeypatch.setattr(noise, "BNG_MAX_N", 100)
|
|
monkeypatch.setattr(noise, "TILE_SIZE", 100)
|
|
|
|
def fake_download_tile(*args, **kwargs):
|
|
return [], [(0, 0, 100, 100)]
|
|
|
|
monkeypatch.setattr(noise, "_download_tile", fake_download_tile)
|
|
|
|
paths = noise.download_raster(
|
|
tmp_path,
|
|
"base",
|
|
"coverage",
|
|
"Airport",
|
|
allow_missing_tiles=True,
|
|
)
|
|
|
|
assert paths == []
|
|
|
|
|
|
def test_download_raster_raises_on_missing_strict_tiles(monkeypatch, tmp_path):
|
|
monkeypatch.setattr(noise, "BNG_MIN_E", 0)
|
|
monkeypatch.setattr(noise, "BNG_MAX_E", 100)
|
|
monkeypatch.setattr(noise, "BNG_MIN_N", 0)
|
|
monkeypatch.setattr(noise, "BNG_MAX_N", 100)
|
|
monkeypatch.setattr(noise, "TILE_SIZE", 100)
|
|
|
|
def fake_download_tile(*args, **kwargs):
|
|
return [], [(0, 0, 100, 100)]
|
|
|
|
monkeypatch.setattr(noise, "_download_tile", fake_download_tile)
|
|
|
|
with pytest.raises(RuntimeError, match=r"\[Road\] Failed to download"):
|
|
noise.download_raster(tmp_path, "base", "coverage", "Road")
|
|
|
|
|
|
def test_generate_tiles_neighbours_overlap_by_radius():
|
|
# Use an explicit non-zero overlap so the assertion verifies a real positive
|
|
# overlap. The production radius is 0 (point sampling), which would make this
|
|
# a vacuous ">= 0" check; this keeps the seam-safety guard meaningful for any
|
|
# future non-zero sampling radius.
|
|
tile_size = 20_000
|
|
overlap = 50
|
|
tiles = noise._generate_tiles(0, 60_000, 0, 60_000, tile_size, overlap, tile_size)
|
|
|
|
by_origin = {(min_e, min_n): (max_e, max_n) for min_e, min_n, max_e, max_n in tiles}
|
|
|
|
saw_horizontal_overlap = False
|
|
# Horizontally adjacent tiles must overlap by >= overlap.
|
|
for (min_e, min_n), (max_e, _max_n) in by_origin.items():
|
|
right_origin = (min_e + tile_size, min_n)
|
|
if right_origin in by_origin:
|
|
assert max_e - right_origin[0] >= overlap
|
|
saw_horizontal_overlap = True
|
|
|
|
# Vertically adjacent tiles must overlap by >= overlap.
|
|
for (min_e, min_n), (_max_e, max_n) in by_origin.items():
|
|
up_origin = (min_e, min_n + tile_size)
|
|
if up_origin in by_origin:
|
|
assert max_n - up_origin[1] >= overlap
|
|
|
|
assert saw_horizontal_overlap # the fixture actually has adjacent tiles
|
|
|
|
|
|
def test_generate_tiles_clamps_to_grid_extent():
|
|
tile_size = 20_000
|
|
overlap = noise.POSTCODE_NOISE_RADIUS_M
|
|
tiles = noise._generate_tiles(
|
|
noise.BNG_MAX_E - tile_size,
|
|
noise.BNG_MAX_E,
|
|
noise.BNG_MAX_N - tile_size,
|
|
noise.BNG_MAX_N,
|
|
tile_size,
|
|
overlap,
|
|
tile_size,
|
|
)
|
|
# The final (top-right) tile cannot extend past the England extent even
|
|
# though the overlap would otherwise push it beyond.
|
|
for _min_e, _min_n, max_e, max_n in tiles:
|
|
assert max_e <= noise.BNG_MAX_E
|
|
assert max_n <= noise.BNG_MAX_N
|
|
|
|
|
|
def _write_geotiff(path, data, left, top, resolution, nodata):
|
|
with rasterio.open(
|
|
path,
|
|
"w",
|
|
driver="GTiff",
|
|
height=data.shape[0],
|
|
width=data.shape[1],
|
|
count=1,
|
|
dtype=data.dtype,
|
|
crs="EPSG:27700",
|
|
transform=from_origin(left, top, resolution, resolution),
|
|
nodata=nodata,
|
|
) as dataset:
|
|
dataset.write(data, 1)
|
|
|
|
|
|
def test_sample_noise_recovers_value_across_overlapping_seam(monkeypatch, tmp_path):
|
|
monkeypatch.setattr(noise, "POSTCODE_NOISE_RADIUS_M", 50)
|
|
monkeypatch.setattr(noise, "RESOLUTION", 10)
|
|
|
|
# Two download tiles share a vertical seam at easting=100. _generate_tiles
|
|
# decides their real footprints: with the overlap fix the LEFT tile extends
|
|
# past the seam by POSTCODE_NOISE_RADIUS_M and thus covers a loud cell that
|
|
# physically sits just across the seam.
|
|
tile_size = 100
|
|
overlap = noise.POSTCODE_NOISE_RADIUS_M
|
|
tiles = noise._generate_tiles(0, 200, 0, 100, tile_size, overlap, tile_size)
|
|
by_origin = {(min_e, min_n): (max_e, max_n) for min_e, min_n, max_e, max_n in tiles}
|
|
left_min_e, left_min_n = 0, 0
|
|
left_max_e, left_max_n = by_origin[(left_min_e, left_min_n)]
|
|
# Overlap fix is what makes the left tile reach across the seam.
|
|
assert left_max_e > 100
|
|
|
|
# The loud 70 dB cell centre is at easting 105 (just across the seam) and
|
|
# the postcode point is at easting 75 in the left tile, within 50m of it.
|
|
res = noise.RESOLUTION
|
|
width = int((left_max_e - left_min_e) // res)
|
|
height = int((left_max_n - left_min_n) // res)
|
|
left_data = np.zeros((height, width), dtype=np.float32)
|
|
loud_row = height - 1 - int((25 - left_min_n) // res) # northing ~25
|
|
loud_col = int((105 - left_min_e) // res) # easting ~105
|
|
left_data[loud_row, loud_col] = 70.0
|
|
_write_geotiff(
|
|
tmp_path / "left.tif", left_data, left_min_e, left_max_n, res, nodata=0
|
|
)
|
|
|
|
# The right tile holds the same loud cell but the postcode point is NOT
|
|
# inside it, so without overlap the value would be lost for that point.
|
|
right_min_e, right_min_n = 100, 0
|
|
right_max_e, right_max_n = by_origin[(right_min_e, right_min_n)]
|
|
rwidth = int((right_max_e - right_min_e) // res)
|
|
rheight = int((right_max_n - right_min_n) // res)
|
|
right_data = np.zeros((rheight, rwidth), dtype=np.float32)
|
|
right_data[rheight - 1 - int((25 - right_min_n) // res), 0] = 70.0
|
|
_write_geotiff(
|
|
tmp_path / "right.tif", right_data, right_min_e, right_max_n, res, nodata=0
|
|
)
|
|
|
|
result = noise.sample_noise_at_postcodes(
|
|
[tmp_path / "left.tif", tmp_path / "right.tif"],
|
|
easting=np.array([75.0]),
|
|
northing=np.array([25.0]),
|
|
label="Road",
|
|
col_name="road_noise_lden_db",
|
|
)
|
|
|
|
assert result.to_list() == [70.0]
|
|
|
|
|
|
def test_sample_noise_distinguishes_nodata_from_in_coverage_quiet(
|
|
monkeypatch, tmp_path
|
|
):
|
|
monkeypatch.setattr(noise, "POSTCODE_NOISE_RADIUS_M", 0)
|
|
monkeypatch.setattr(noise, "RESOLUTION", 10)
|
|
|
|
# Defra encodes TRUE nodata as the -96.0 sentinel; genuinely quiet ground
|
|
# below the lowest reporting band is 0.0. With a 0m radius each postcode
|
|
# reads exactly one cell, so we can pin behaviour per cell:
|
|
# -96.0 sentinel -> null ("we don't know")
|
|
# 0.0 in-coverage -> NOISE_QUIET_FLOOR_DB ("we know it's quiet")
|
|
# 65.0 -> 65.0 (a real modelled reading)
|
|
data = np.array(
|
|
[
|
|
[-96.0, 0.0, 65.0],
|
|
],
|
|
dtype=np.float32,
|
|
)
|
|
_write_geotiff(tmp_path / "noise.tif", data, 0, 10, 10, nodata=-96.0)
|
|
|
|
result = noise.sample_noise_at_postcodes(
|
|
[tmp_path / "noise.tif"],
|
|
# Cell centres at easting 5 (nodata), 15 (quiet 0.0), 25 (loud 65).
|
|
easting=np.array([5.0, 15.0, 25.0]),
|
|
northing=np.array([5.0, 5.0, 5.0]),
|
|
label="Road",
|
|
col_name="road_noise_lden_db",
|
|
)
|
|
|
|
assert result.to_list() == [None, float(noise.NOISE_QUIET_FLOOR_DB), 65.0]
|
|
|
|
|
|
def test_sample_noise_preserves_genuine_reading_above_quiet_floor(
|
|
monkeypatch, tmp_path
|
|
):
|
|
monkeypatch.setattr(noise, "POSTCODE_NOISE_RADIUS_M", 0)
|
|
monkeypatch.setattr(noise, "RESOLUTION", 10)
|
|
|
|
# The lowest Defra reporting band is 40.0 dB; genuine readings populate
|
|
# [40, ~80]. A genuine in-coverage reading at or just above the floor must be
|
|
# PRESERVED, not clamped UP to the floor — only true-quiet 0.0 is floored. A
|
|
# quiet floor set too high (e.g. 45) would inflate the ~35% of real 40-44.99
|
|
# dB readings; this pins that they survive unchanged.
|
|
floor = float(noise.NOISE_QUIET_FLOOR_DB)
|
|
data = np.array(
|
|
[
|
|
[42.0, floor, 0.0],
|
|
],
|
|
dtype=np.float32,
|
|
)
|
|
_write_geotiff(tmp_path / "noise.tif", data, 0, 10, 10, nodata=-96.0)
|
|
|
|
result = noise.sample_noise_at_postcodes(
|
|
[tmp_path / "noise.tif"],
|
|
# Cell centres at easting 5 (42 dB), 15 (floor), 25 (quiet 0.0).
|
|
easting=np.array([5.0, 15.0, 25.0]),
|
|
northing=np.array([5.0, 5.0, 5.0]),
|
|
label="Road",
|
|
col_name="road_noise_lden_db",
|
|
)
|
|
|
|
# 42 preserved (NOT raised to the floor), floor preserved, 0.0 -> floor.
|
|
assert result.to_list() == [42.0, floor, floor]
|
|
# The floor must sit at/below the lowest genuine reading so nothing inflates.
|
|
assert floor <= 42.0
|
|
|
|
|
|
def test_sample_noise_nodata_window_stays_null(monkeypatch, tmp_path):
|
|
monkeypatch.setattr(noise, "POSTCODE_NOISE_RADIUS_M", 15)
|
|
monkeypatch.setattr(noise, "RESOLUTION", 10)
|
|
|
|
# A postcode whose entire 3x3 max-window is the -96.0 sentinel must remain
|
|
# null: no in-coverage cell was read, so "quiet" must NOT be inferred.
|
|
data = np.full((5, 5), -96.0, dtype=np.float32)
|
|
data[4, 4] = 70.0 # one loud cell, far from the nodata corner
|
|
_write_geotiff(tmp_path / "noise.tif", data, 0, 50, 10, nodata=-96.0)
|
|
|
|
result = noise.sample_noise_at_postcodes(
|
|
[tmp_path / "noise.tif"],
|
|
# Top-left point: its 3x3 window is cells (rows 0-1, cols 0-1) = all -96.
|
|
easting=np.array([5.0]),
|
|
northing=np.array([45.0]),
|
|
label="Road",
|
|
col_name="road_noise_lden_db",
|
|
)
|
|
|
|
assert result.to_list() == [None]
|
|
|
|
|
|
def test_sample_noise_default_radius_samples_at_point_not_window(monkeypatch, tmp_path):
|
|
# Regression: production samples noise at the postcode's own 10m cell
|
|
# (POSTCODE_NOISE_RADIUS_M == 0), NOT a max-of-window that would grab the
|
|
# loudest nearby road cell and inflate every postcode's noise by ~+9 dB.
|
|
monkeypatch.setattr(noise, "RESOLUTION", 10)
|
|
assert noise.POSTCODE_NOISE_RADIUS_M == 0
|
|
|
|
# Cell 0 = quiet (at the 40 dB floor), cell 1 = loud road (70), adjacent.
|
|
data = np.array([[40.0, 70.0]], dtype=np.float32)
|
|
_write_geotiff(tmp_path / "noise.tif", data, 0, 10, 10, nodata=-96.0)
|
|
|
|
result = noise.sample_noise_at_postcodes(
|
|
[tmp_path / "noise.tif"],
|
|
# Cell centres: easting 5 -> quiet cell 0; the loud cell 1 is at 15.
|
|
easting=np.array([5.0]),
|
|
northing=np.array([5.0]),
|
|
label="Road",
|
|
col_name="road_noise_lden_db",
|
|
)
|
|
|
|
# Point sampling reads the quiet own-cell (40), not the loud neighbour (70).
|
|
assert result.to_list() == [40.0]
|
|
|
|
|
|
def test_sample_noise_at_postcodes_uses_local_maximum(monkeypatch, tmp_path):
|
|
monkeypatch.setattr(noise, "POSTCODE_NOISE_RADIUS_M", 15)
|
|
monkeypatch.setattr(noise, "RESOLUTION", 10)
|
|
tile_path = tmp_path / "noise.tif"
|
|
data = np.array(
|
|
[
|
|
[0, 0, 0, 0, 0],
|
|
[0, 70, 0, 0, 0],
|
|
[0, 0, 55, 0, 0],
|
|
[0, 0, 0, 0, 0],
|
|
[0, 0, 0, 0, 0],
|
|
],
|
|
dtype=np.float32,
|
|
)
|
|
with rasterio.open(
|
|
tile_path,
|
|
"w",
|
|
driver="GTiff",
|
|
height=data.shape[0],
|
|
width=data.shape[1],
|
|
count=1,
|
|
dtype=data.dtype,
|
|
crs="EPSG:27700",
|
|
transform=from_origin(0, 50, 10, 10),
|
|
nodata=0,
|
|
) as dataset:
|
|
dataset.write(data, 1)
|
|
|
|
result = noise.sample_noise_at_postcodes(
|
|
[tile_path],
|
|
easting=np.array([25.0]),
|
|
northing=np.array([25.0]),
|
|
label="Road",
|
|
col_name="road_noise_lden_db",
|
|
)
|
|
|
|
assert result.to_list() == [70.0]
|