import httpx import numpy as np import pytest import rasterio from rasterio.io import MemoryFile from rasterio.transform import from_origin from pipeline.download import noise def _tiny_geotiff_bytes() -> bytes: data = np.array([[55]], dtype=np.uint8) with MemoryFile() as memfile: with memfile.open( driver="GTiff", height=data.shape[0], width=data.shape[1], count=1, dtype=data.dtype, crs="EPSG:27700", transform=from_origin(0, 1, 1, 1), ) as dataset: dataset.write(data, 1) return bytes(memfile.getbuffer()) def test_download_tile_splits_after_retries(monkeypatch, tmp_path): monkeypatch.setattr(noise, "MAX_RETRIES", 1) monkeypatch.setattr(noise, "MIN_TILE_SIZE", 50) tile_bytes = _tiny_geotiff_bytes() def fake_fetch_tile_bytes( wcs_base, coverage_id, min_e, min_n, max_e, max_n, wcs_version="1.0.0", ): if max_e - min_e > 50 or max_n - min_n > 50: raise httpx.TimeoutException("too large") return tile_bytes monkeypatch.setattr(noise, "_fetch_tile_bytes", fake_fetch_tile_bytes) paths, failures = noise._download_tile("base", "coverage", 0, 0, 100, 100, tmp_path) assert failures == [] assert len(paths) == 4 assert sorted(path.name for path in paths) == [ "tile_0_0_50_50.tif", "tile_0_50_50_100.tif", "tile_50_0_100_50.tif", "tile_50_50_100_100.tif", ] def test_download_tile_reports_unsplittable_failure(monkeypatch, tmp_path): monkeypatch.setattr(noise, "MAX_RETRIES", 1) monkeypatch.setattr(noise, "MIN_TILE_SIZE", 100) def fake_fetch_tile_bytes(*args, **kwargs): raise httpx.ConnectError("offline") monkeypatch.setattr(noise, "_fetch_tile_bytes", fake_fetch_tile_bytes) paths, failures = noise._download_tile("base", "coverage", 0, 0, 100, 100, tmp_path) assert paths == [] assert failures == [(0, 0, 100, 100)] def test_download_tile_treats_non_tiff_response_as_failure(monkeypatch, tmp_path): monkeypatch.setattr(noise, "MAX_RETRIES", 1) monkeypatch.setattr(noise, "MIN_TILE_SIZE", 100) def fake_fetch_tile_bytes(*args, **kwargs): raise noise.NoGeoTiffError("xml exception") monkeypatch.setattr(noise, "_fetch_tile_bytes", fake_fetch_tile_bytes) paths, failures = noise._download_tile("base", "coverage", 0, 0, 100, 100, tmp_path) assert paths == [] assert failures == [(0, 0, 100, 100)] def test_download_raster_tolerates_missing_tiles_when_allowed(monkeypatch, tmp_path): monkeypatch.setattr(noise, "BNG_MIN_E", 0) monkeypatch.setattr(noise, "BNG_MAX_E", 100) monkeypatch.setattr(noise, "BNG_MIN_N", 0) monkeypatch.setattr(noise, "BNG_MAX_N", 100) monkeypatch.setattr(noise, "TILE_SIZE", 100) def fake_download_tile(*args, **kwargs): return [], [(0, 0, 100, 100)] monkeypatch.setattr(noise, "_download_tile", fake_download_tile) paths = noise.download_raster( tmp_path, "base", "coverage", "Airport", allow_missing_tiles=True, ) assert paths == [] def test_download_raster_raises_on_missing_strict_tiles(monkeypatch, tmp_path): monkeypatch.setattr(noise, "BNG_MIN_E", 0) monkeypatch.setattr(noise, "BNG_MAX_E", 100) monkeypatch.setattr(noise, "BNG_MIN_N", 0) monkeypatch.setattr(noise, "BNG_MAX_N", 100) monkeypatch.setattr(noise, "TILE_SIZE", 100) def fake_download_tile(*args, **kwargs): return [], [(0, 0, 100, 100)] monkeypatch.setattr(noise, "_download_tile", fake_download_tile) with pytest.raises(RuntimeError, match=r"\[Road\] Failed to download"): noise.download_raster(tmp_path, "base", "coverage", "Road") def test_generate_tiles_neighbours_overlap_by_radius(): # Use an explicit non-zero overlap so the assertion verifies a real positive # overlap. The production radius is 0 (point sampling), which would make this # a vacuous ">= 0" check; this keeps the seam-safety guard meaningful for any # future non-zero sampling radius. tile_size = 20_000 overlap = 50 tiles = noise._generate_tiles(0, 60_000, 0, 60_000, tile_size, overlap, tile_size) by_origin = {(min_e, min_n): (max_e, max_n) for min_e, min_n, max_e, max_n in tiles} saw_horizontal_overlap = False # Horizontally adjacent tiles must overlap by >= overlap. for (min_e, min_n), (max_e, _max_n) in by_origin.items(): right_origin = (min_e + tile_size, min_n) if right_origin in by_origin: assert max_e - right_origin[0] >= overlap saw_horizontal_overlap = True # Vertically adjacent tiles must overlap by >= overlap. for (min_e, min_n), (_max_e, max_n) in by_origin.items(): up_origin = (min_e, min_n + tile_size) if up_origin in by_origin: assert max_n - up_origin[1] >= overlap assert saw_horizontal_overlap # the fixture actually has adjacent tiles def test_generate_tiles_clamps_to_grid_extent(): tile_size = 20_000 overlap = noise.POSTCODE_NOISE_RADIUS_M tiles = noise._generate_tiles( noise.BNG_MAX_E - tile_size, noise.BNG_MAX_E, noise.BNG_MAX_N - tile_size, noise.BNG_MAX_N, tile_size, overlap, tile_size, ) # The final (top-right) tile cannot extend past the England extent even # though the overlap would otherwise push it beyond. for _min_e, _min_n, max_e, max_n in tiles: assert max_e <= noise.BNG_MAX_E assert max_n <= noise.BNG_MAX_N def _write_geotiff(path, data, left, top, resolution, nodata): with rasterio.open( path, "w", driver="GTiff", height=data.shape[0], width=data.shape[1], count=1, dtype=data.dtype, crs="EPSG:27700", transform=from_origin(left, top, resolution, resolution), nodata=nodata, ) as dataset: dataset.write(data, 1) def test_sample_noise_recovers_value_across_overlapping_seam(monkeypatch, tmp_path): monkeypatch.setattr(noise, "POSTCODE_NOISE_RADIUS_M", 50) monkeypatch.setattr(noise, "RESOLUTION", 10) # Two download tiles share a vertical seam at easting=100. _generate_tiles # decides their real footprints: with the overlap fix the LEFT tile extends # past the seam by POSTCODE_NOISE_RADIUS_M and thus covers a loud cell that # physically sits just across the seam. tile_size = 100 overlap = noise.POSTCODE_NOISE_RADIUS_M tiles = noise._generate_tiles(0, 200, 0, 100, tile_size, overlap, tile_size) by_origin = {(min_e, min_n): (max_e, max_n) for min_e, min_n, max_e, max_n in tiles} left_min_e, left_min_n = 0, 0 left_max_e, left_max_n = by_origin[(left_min_e, left_min_n)] # Overlap fix is what makes the left tile reach across the seam. assert left_max_e > 100 # The loud 70 dB cell centre is at easting 105 (just across the seam) and # the postcode point is at easting 75 in the left tile, within 50m of it. res = noise.RESOLUTION width = int((left_max_e - left_min_e) // res) height = int((left_max_n - left_min_n) // res) left_data = np.zeros((height, width), dtype=np.float32) loud_row = height - 1 - int((25 - left_min_n) // res) # northing ~25 loud_col = int((105 - left_min_e) // res) # easting ~105 left_data[loud_row, loud_col] = 70.0 _write_geotiff( tmp_path / "left.tif", left_data, left_min_e, left_max_n, res, nodata=0 ) # The right tile holds the same loud cell but the postcode point is NOT # inside it, so without overlap the value would be lost for that point. right_min_e, right_min_n = 100, 0 right_max_e, right_max_n = by_origin[(right_min_e, right_min_n)] rwidth = int((right_max_e - right_min_e) // res) rheight = int((right_max_n - right_min_n) // res) right_data = np.zeros((rheight, rwidth), dtype=np.float32) right_data[rheight - 1 - int((25 - right_min_n) // res), 0] = 70.0 _write_geotiff( tmp_path / "right.tif", right_data, right_min_e, right_max_n, res, nodata=0 ) result = noise.sample_noise_at_postcodes( [tmp_path / "left.tif", tmp_path / "right.tif"], easting=np.array([75.0]), northing=np.array([25.0]), label="Road", col_name="road_noise_lden_db", ) assert result.to_list() == [70.0] def test_sample_noise_distinguishes_nodata_from_in_coverage_quiet( monkeypatch, tmp_path ): monkeypatch.setattr(noise, "POSTCODE_NOISE_RADIUS_M", 0) monkeypatch.setattr(noise, "RESOLUTION", 10) # Defra encodes TRUE nodata as the -96.0 sentinel; genuinely quiet ground # below the lowest reporting band is 0.0. With a 0m radius each postcode # reads exactly one cell, so we can pin behaviour per cell: # -96.0 sentinel -> null ("we don't know") # 0.0 in-coverage -> NOISE_QUIET_FLOOR_DB ("we know it's quiet") # 65.0 -> 65.0 (a real modelled reading) data = np.array( [ [-96.0, 0.0, 65.0], ], dtype=np.float32, ) _write_geotiff(tmp_path / "noise.tif", data, 0, 10, 10, nodata=-96.0) result = noise.sample_noise_at_postcodes( [tmp_path / "noise.tif"], # Cell centres at easting 5 (nodata), 15 (quiet 0.0), 25 (loud 65). easting=np.array([5.0, 15.0, 25.0]), northing=np.array([5.0, 5.0, 5.0]), label="Road", col_name="road_noise_lden_db", ) assert result.to_list() == [None, float(noise.NOISE_QUIET_FLOOR_DB), 65.0] def test_sample_noise_preserves_genuine_reading_above_quiet_floor( monkeypatch, tmp_path ): monkeypatch.setattr(noise, "POSTCODE_NOISE_RADIUS_M", 0) monkeypatch.setattr(noise, "RESOLUTION", 10) # The lowest Defra reporting band is 40.0 dB; genuine readings populate # [40, ~80]. A genuine in-coverage reading at or just above the floor must be # PRESERVED, not clamped UP to the floor — only true-quiet 0.0 is floored. A # quiet floor set too high (e.g. 45) would inflate the ~35% of real 40-44.99 # dB readings; this pins that they survive unchanged. floor = float(noise.NOISE_QUIET_FLOOR_DB) data = np.array( [ [42.0, floor, 0.0], ], dtype=np.float32, ) _write_geotiff(tmp_path / "noise.tif", data, 0, 10, 10, nodata=-96.0) result = noise.sample_noise_at_postcodes( [tmp_path / "noise.tif"], # Cell centres at easting 5 (42 dB), 15 (floor), 25 (quiet 0.0). easting=np.array([5.0, 15.0, 25.0]), northing=np.array([5.0, 5.0, 5.0]), label="Road", col_name="road_noise_lden_db", ) # 42 preserved (NOT raised to the floor), floor preserved, 0.0 -> floor. assert result.to_list() == [42.0, floor, floor] # The floor must sit at/below the lowest genuine reading so nothing inflates. assert floor <= 42.0 def test_sample_noise_nodata_window_stays_null(monkeypatch, tmp_path): monkeypatch.setattr(noise, "POSTCODE_NOISE_RADIUS_M", 15) monkeypatch.setattr(noise, "RESOLUTION", 10) # A postcode whose entire 3x3 max-window is the -96.0 sentinel must remain # null: no in-coverage cell was read, so "quiet" must NOT be inferred. data = np.full((5, 5), -96.0, dtype=np.float32) data[4, 4] = 70.0 # one loud cell, far from the nodata corner _write_geotiff(tmp_path / "noise.tif", data, 0, 50, 10, nodata=-96.0) result = noise.sample_noise_at_postcodes( [tmp_path / "noise.tif"], # Top-left point: its 3x3 window is cells (rows 0-1, cols 0-1) = all -96. easting=np.array([5.0]), northing=np.array([45.0]), label="Road", col_name="road_noise_lden_db", ) assert result.to_list() == [None] def test_sample_noise_default_radius_samples_at_point_not_window(monkeypatch, tmp_path): # Regression: production samples noise at the postcode's own 10m cell # (POSTCODE_NOISE_RADIUS_M == 0), NOT a max-of-window that would grab the # loudest nearby road cell and inflate every postcode's noise by ~+9 dB. monkeypatch.setattr(noise, "RESOLUTION", 10) assert noise.POSTCODE_NOISE_RADIUS_M == 0 # Cell 0 = quiet (at the 40 dB floor), cell 1 = loud road (70), adjacent. data = np.array([[40.0, 70.0]], dtype=np.float32) _write_geotiff(tmp_path / "noise.tif", data, 0, 10, 10, nodata=-96.0) result = noise.sample_noise_at_postcodes( [tmp_path / "noise.tif"], # Cell centres: easting 5 -> quiet cell 0; the loud cell 1 is at 15. easting=np.array([5.0]), northing=np.array([5.0]), label="Road", col_name="road_noise_lden_db", ) # Point sampling reads the quiet own-cell (40), not the loud neighbour (70). assert result.to_list() == [40.0] def test_sample_noise_at_postcodes_uses_local_maximum(monkeypatch, tmp_path): monkeypatch.setattr(noise, "POSTCODE_NOISE_RADIUS_M", 15) monkeypatch.setattr(noise, "RESOLUTION", 10) tile_path = tmp_path / "noise.tif" data = np.array( [ [0, 0, 0, 0, 0], [0, 70, 0, 0, 0], [0, 0, 55, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], ], dtype=np.float32, ) with rasterio.open( tile_path, "w", driver="GTiff", height=data.shape[0], width=data.shape[1], count=1, dtype=data.dtype, crs="EPSG:27700", transform=from_origin(0, 50, 10, 10), nodata=0, ) as dataset: dataset.write(data, 1) result = noise.sample_noise_at_postcodes( [tile_path], easting=np.array([25.0]), northing=np.array([25.0]), label="Road", col_name="road_noise_lden_db", ) assert result.to_list() == [70.0]