try
Some checks failed
CI / Check (push) Failing after 3m22s
Build and publish Docker image / build-and-push (push) Successful in 7m25s

This commit is contained in:
Andras Schmelczer 2026-06-04 22:34:26 +01:00
parent 843d14b7ba
commit c938b71904
13 changed files with 698 additions and 109 deletions

View file

@ -1,7 +1,7 @@
"""Download Defra Round 4 (2022) strategic noise data for England.
Downloads modelled noise levels (road, rail, airport) as GeoTIFF rasters via
WCS, then samples the local maximum around each postcode representative point.
WCS, then samples the 10m cell at each postcode representative point.
Outputs a parquet file with postcode-level noise in dB for each source.
Uses smaller 20km tiles at native 10m resolution so values are not understated
@ -98,15 +98,21 @@ NOISE_NODATA_SENTINEL = np.float32(-96.0)
# NOISE_COLOR_STOPS[0]) — a rendering threshold, not the data's reporting floor.
NOISE_QUIET_FLOOR_DB = np.float32(40.0)
# The pipeline has postcode representative points rather than complete unit
# polygons here. Use a small local footprint and take the maximum 10m cell so
# postcode-level noise is not understated by centroid rounding.
POSTCODE_NOISE_RADIUS_M = 50
# Sample noise at the postcode representative point itself (no neighbourhood
# window). A 50m MAX-of-window grabbed the single loudest 10m cell within ~1.2 ha
# of every postcode; because Defra road contours hug every modelled road and
# representative points sit on/near streets, that inflated postcode noise by
# roughly +9 dB (log scale) — making ~94% of England read >=55 dB Lden and
# collapsing the metric's discrimination at the quiet end. Radius 0 ->
# filter_size 1 -> the maximum_filter is skipped and each postcode reads the
# 10m cell it actually sits in.
POSTCODE_NOISE_RADIUS_M = 0
# Adjacent download tiles must overlap by at least the sampling radius so every
# postcode's 50m max-window is fully contained in at least one tile. Without
# this, a loud pixel just across a tile seam is invisible to a postcode on the
# far side, under-reporting noise near seams.
# Adjacent download tiles overlap by the sampling radius so every postcode's
# sampling footprint is fully contained in at least one tile. With point
# sampling (radius 0) this is 0 — a representative point falls inside exactly
# one tile — but the relationship is kept so any future non-zero radius keeps
# its window seam-safe.
TILE_OVERLAP_M = POSTCODE_NOISE_RADIUS_M
# Retry/split behaviour for slow Defra WCS requests. Some 100km eastern tiles
@ -413,8 +419,13 @@ def sample_noise_at_postcodes(
label: str,
col_name: str,
) -> pl.Series:
"""Sample max noise values from 10m tiles around postcode representative points."""
print(f"[{label}] Sampling max noise values from {len(tile_paths)} tiles...")
"""Sample noise from 10m tiles at postcode representative points.
With POSTCODE_NOISE_RADIUS_M == 0 (the default) each postcode reads the
single 10m cell it sits in; a larger radius reduces to a max over the
surrounding window.
"""
print(f"[{label}] Sampling noise values from {len(tile_paths)} tiles...")
noise_db = np.full(len(easting), np.nan, dtype=np.float32)
radius_cells = max(0, math.ceil(POSTCODE_NOISE_RADIUS_M / RESOLUTION))
filter_size = radius_cells * 2 + 1

View file

@ -126,19 +126,23 @@ def test_download_raster_raises_on_missing_strict_tiles(monkeypatch, tmp_path):
def test_generate_tiles_neighbours_overlap_by_radius():
# Use an explicit non-zero overlap so the assertion verifies a real positive
# overlap. The production radius is 0 (point sampling), which would make this
# a vacuous ">= 0" check; this keeps the seam-safety guard meaningful for any
# future non-zero sampling radius.
tile_size = 20_000
overlap = noise.POSTCODE_NOISE_RADIUS_M
tiles = noise._generate_tiles(
0, 60_000, 0, 60_000, tile_size, overlap, tile_size
)
overlap = 50
tiles = noise._generate_tiles(0, 60_000, 0, 60_000, tile_size, overlap, tile_size)
by_origin = {(min_e, min_n): (max_e, max_n) for min_e, min_n, max_e, max_n in tiles}
saw_horizontal_overlap = False
# Horizontally adjacent tiles must overlap by >= overlap.
for (min_e, min_n), (max_e, _max_n) in by_origin.items():
right_origin = (min_e + tile_size, min_n)
if right_origin in by_origin:
assert max_e - right_origin[0] >= overlap
saw_horizontal_overlap = True
# Vertically adjacent tiles must overlap by >= overlap.
for (min_e, min_n), (_max_e, max_n) in by_origin.items():
@ -146,6 +150,8 @@ def test_generate_tiles_neighbours_overlap_by_radius():
if up_origin in by_origin:
assert max_n - up_origin[1] >= overlap
assert saw_horizontal_overlap # the fixture actually has adjacent tiles
def test_generate_tiles_clamps_to_grid_extent():
tile_size = 20_000
@ -193,9 +199,7 @@ def test_sample_noise_recovers_value_across_overlapping_seam(monkeypatch, tmp_pa
tile_size = 100
overlap = noise.POSTCODE_NOISE_RADIUS_M
tiles = noise._generate_tiles(0, 200, 0, 100, tile_size, overlap, tile_size)
by_origin = {
(min_e, min_n): (max_e, max_n) for min_e, min_n, max_e, max_n in tiles
}
by_origin = {(min_e, min_n): (max_e, max_n) for min_e, min_n, max_e, max_n in tiles}
left_min_e, left_min_n = 0, 0
left_max_e, left_max_n = by_origin[(left_min_e, left_min_n)]
# Overlap fix is what makes the left tile reach across the seam.
@ -269,7 +273,9 @@ def test_sample_noise_distinguishes_nodata_from_in_coverage_quiet(
assert result.to_list() == [None, float(noise.NOISE_QUIET_FLOOR_DB), 65.0]
def test_sample_noise_preserves_genuine_reading_above_quiet_floor(monkeypatch, tmp_path):
def test_sample_noise_preserves_genuine_reading_above_quiet_floor(
monkeypatch, tmp_path
):
monkeypatch.setattr(noise, "POSTCODE_NOISE_RADIUS_M", 0)
monkeypatch.setattr(noise, "RESOLUTION", 10)
@ -324,6 +330,30 @@ def test_sample_noise_nodata_window_stays_null(monkeypatch, tmp_path):
assert result.to_list() == [None]
def test_sample_noise_default_radius_samples_at_point_not_window(monkeypatch, tmp_path):
# Regression: production samples noise at the postcode's own 10m cell
# (POSTCODE_NOISE_RADIUS_M == 0), NOT a max-of-window that would grab the
# loudest nearby road cell and inflate every postcode's noise by ~+9 dB.
monkeypatch.setattr(noise, "RESOLUTION", 10)
assert noise.POSTCODE_NOISE_RADIUS_M == 0
# Cell 0 = quiet (at the 40 dB floor), cell 1 = loud road (70), adjacent.
data = np.array([[40.0, 70.0]], dtype=np.float32)
_write_geotiff(tmp_path / "noise.tif", data, 0, 10, 10, nodata=-96.0)
result = noise.sample_noise_at_postcodes(
[tmp_path / "noise.tif"],
# Cell centres: easting 5 -> quiet cell 0; the loud cell 1 is at 15.
easting=np.array([5.0]),
northing=np.array([5.0]),
label="Road",
col_name="road_noise_lden_db",
)
# Point sampling reads the quiet own-cell (40), not the loud neighbour (70).
assert result.to_list() == [40.0]
def test_sample_noise_at_postcodes_uses_local_maximum(monkeypatch, tmp_path):
monkeypatch.setattr(noise, "POSTCODE_NOISE_RADIUS_M", 15)
monkeypatch.setattr(noise, "RESOLUTION", 10)