try
This commit is contained in:
parent
843d14b7ba
commit
c938b71904
13 changed files with 698 additions and 109 deletions
|
|
@ -1,7 +1,7 @@
|
|||
"""Download Defra Round 4 (2022) strategic noise data for England.
|
||||
|
||||
Downloads modelled noise levels (road, rail, airport) as GeoTIFF rasters via
|
||||
WCS, then samples the local maximum around each postcode representative point.
|
||||
WCS, then samples the 10m cell at each postcode representative point.
|
||||
Outputs a parquet file with postcode-level noise in dB for each source.
|
||||
|
||||
Uses smaller 20km tiles at native 10m resolution so values are not understated
|
||||
|
|
@ -98,15 +98,21 @@ NOISE_NODATA_SENTINEL = np.float32(-96.0)
|
|||
# NOISE_COLOR_STOPS[0]) — a rendering threshold, not the data's reporting floor.
|
||||
NOISE_QUIET_FLOOR_DB = np.float32(40.0)
|
||||
|
||||
# The pipeline has postcode representative points rather than complete unit
|
||||
# polygons here. Use a small local footprint and take the maximum 10m cell so
|
||||
# postcode-level noise is not understated by centroid rounding.
|
||||
POSTCODE_NOISE_RADIUS_M = 50
|
||||
# Sample noise at the postcode representative point itself (no neighbourhood
|
||||
# window). A 50m MAX-of-window grabbed the single loudest 10m cell within ~1.2 ha
|
||||
# of every postcode; because Defra road contours hug every modelled road and
|
||||
# representative points sit on/near streets, that inflated postcode noise by
|
||||
# roughly +9 dB (log scale) — making ~94% of England read >=55 dB Lden and
|
||||
# collapsing the metric's discrimination at the quiet end. Radius 0 ->
|
||||
# filter_size 1 -> the maximum_filter is skipped and each postcode reads the
|
||||
# 10m cell it actually sits in.
|
||||
POSTCODE_NOISE_RADIUS_M = 0
|
||||
|
||||
# Adjacent download tiles must overlap by at least the sampling radius so every
|
||||
# postcode's 50m max-window is fully contained in at least one tile. Without
|
||||
# this, a loud pixel just across a tile seam is invisible to a postcode on the
|
||||
# far side, under-reporting noise near seams.
|
||||
# Adjacent download tiles overlap by the sampling radius so every postcode's
|
||||
# sampling footprint is fully contained in at least one tile. With point
|
||||
# sampling (radius 0) this is 0 — a representative point falls inside exactly
|
||||
# one tile — but the relationship is kept so any future non-zero radius keeps
|
||||
# its window seam-safe.
|
||||
TILE_OVERLAP_M = POSTCODE_NOISE_RADIUS_M
|
||||
|
||||
# Retry/split behaviour for slow Defra WCS requests. Some 100km eastern tiles
|
||||
|
|
@ -413,8 +419,13 @@ def sample_noise_at_postcodes(
|
|||
label: str,
|
||||
col_name: str,
|
||||
) -> pl.Series:
|
||||
"""Sample max noise values from 10m tiles around postcode representative points."""
|
||||
print(f"[{label}] Sampling max noise values from {len(tile_paths)} tiles...")
|
||||
"""Sample noise from 10m tiles at postcode representative points.
|
||||
|
||||
With POSTCODE_NOISE_RADIUS_M == 0 (the default) each postcode reads the
|
||||
single 10m cell it sits in; a larger radius reduces to a max over the
|
||||
surrounding window.
|
||||
"""
|
||||
print(f"[{label}] Sampling noise values from {len(tile_paths)} tiles...")
|
||||
noise_db = np.full(len(easting), np.nan, dtype=np.float32)
|
||||
radius_cells = max(0, math.ceil(POSTCODE_NOISE_RADIUS_M / RESOLUTION))
|
||||
filter_size = radius_cells * 2 + 1
|
||||
|
|
|
|||
|
|
@ -126,19 +126,23 @@ def test_download_raster_raises_on_missing_strict_tiles(monkeypatch, tmp_path):
|
|||
|
||||
|
||||
def test_generate_tiles_neighbours_overlap_by_radius():
|
||||
# Use an explicit non-zero overlap so the assertion verifies a real positive
|
||||
# overlap. The production radius is 0 (point sampling), which would make this
|
||||
# a vacuous ">= 0" check; this keeps the seam-safety guard meaningful for any
|
||||
# future non-zero sampling radius.
|
||||
tile_size = 20_000
|
||||
overlap = noise.POSTCODE_NOISE_RADIUS_M
|
||||
tiles = noise._generate_tiles(
|
||||
0, 60_000, 0, 60_000, tile_size, overlap, tile_size
|
||||
)
|
||||
overlap = 50
|
||||
tiles = noise._generate_tiles(0, 60_000, 0, 60_000, tile_size, overlap, tile_size)
|
||||
|
||||
by_origin = {(min_e, min_n): (max_e, max_n) for min_e, min_n, max_e, max_n in tiles}
|
||||
|
||||
saw_horizontal_overlap = False
|
||||
# Horizontally adjacent tiles must overlap by >= overlap.
|
||||
for (min_e, min_n), (max_e, _max_n) in by_origin.items():
|
||||
right_origin = (min_e + tile_size, min_n)
|
||||
if right_origin in by_origin:
|
||||
assert max_e - right_origin[0] >= overlap
|
||||
saw_horizontal_overlap = True
|
||||
|
||||
# Vertically adjacent tiles must overlap by >= overlap.
|
||||
for (min_e, min_n), (_max_e, max_n) in by_origin.items():
|
||||
|
|
@ -146,6 +150,8 @@ def test_generate_tiles_neighbours_overlap_by_radius():
|
|||
if up_origin in by_origin:
|
||||
assert max_n - up_origin[1] >= overlap
|
||||
|
||||
assert saw_horizontal_overlap # the fixture actually has adjacent tiles
|
||||
|
||||
|
||||
def test_generate_tiles_clamps_to_grid_extent():
|
||||
tile_size = 20_000
|
||||
|
|
@ -193,9 +199,7 @@ def test_sample_noise_recovers_value_across_overlapping_seam(monkeypatch, tmp_pa
|
|||
tile_size = 100
|
||||
overlap = noise.POSTCODE_NOISE_RADIUS_M
|
||||
tiles = noise._generate_tiles(0, 200, 0, 100, tile_size, overlap, tile_size)
|
||||
by_origin = {
|
||||
(min_e, min_n): (max_e, max_n) for min_e, min_n, max_e, max_n in tiles
|
||||
}
|
||||
by_origin = {(min_e, min_n): (max_e, max_n) for min_e, min_n, max_e, max_n in tiles}
|
||||
left_min_e, left_min_n = 0, 0
|
||||
left_max_e, left_max_n = by_origin[(left_min_e, left_min_n)]
|
||||
# Overlap fix is what makes the left tile reach across the seam.
|
||||
|
|
@ -269,7 +273,9 @@ def test_sample_noise_distinguishes_nodata_from_in_coverage_quiet(
|
|||
assert result.to_list() == [None, float(noise.NOISE_QUIET_FLOOR_DB), 65.0]
|
||||
|
||||
|
||||
def test_sample_noise_preserves_genuine_reading_above_quiet_floor(monkeypatch, tmp_path):
|
||||
def test_sample_noise_preserves_genuine_reading_above_quiet_floor(
|
||||
monkeypatch, tmp_path
|
||||
):
|
||||
monkeypatch.setattr(noise, "POSTCODE_NOISE_RADIUS_M", 0)
|
||||
monkeypatch.setattr(noise, "RESOLUTION", 10)
|
||||
|
||||
|
|
@ -324,6 +330,30 @@ def test_sample_noise_nodata_window_stays_null(monkeypatch, tmp_path):
|
|||
assert result.to_list() == [None]
|
||||
|
||||
|
||||
def test_sample_noise_default_radius_samples_at_point_not_window(monkeypatch, tmp_path):
|
||||
# Regression: production samples noise at the postcode's own 10m cell
|
||||
# (POSTCODE_NOISE_RADIUS_M == 0), NOT a max-of-window that would grab the
|
||||
# loudest nearby road cell and inflate every postcode's noise by ~+9 dB.
|
||||
monkeypatch.setattr(noise, "RESOLUTION", 10)
|
||||
assert noise.POSTCODE_NOISE_RADIUS_M == 0
|
||||
|
||||
# Cell 0 = quiet (at the 40 dB floor), cell 1 = loud road (70), adjacent.
|
||||
data = np.array([[40.0, 70.0]], dtype=np.float32)
|
||||
_write_geotiff(tmp_path / "noise.tif", data, 0, 10, 10, nodata=-96.0)
|
||||
|
||||
result = noise.sample_noise_at_postcodes(
|
||||
[tmp_path / "noise.tif"],
|
||||
# Cell centres: easting 5 -> quiet cell 0; the loud cell 1 is at 15.
|
||||
easting=np.array([5.0]),
|
||||
northing=np.array([5.0]),
|
||||
label="Road",
|
||||
col_name="road_noise_lden_db",
|
||||
)
|
||||
|
||||
# Point sampling reads the quiet own-cell (40), not the loud neighbour (70).
|
||||
assert result.to_list() == [40.0]
|
||||
|
||||
|
||||
def test_sample_noise_at_postcodes_uses_local_maximum(monkeypatch, tmp_path):
|
||||
monkeypatch.setattr(noise, "POSTCODE_NOISE_RADIUS_M", 15)
|
||||
monkeypatch.setattr(noise, "RESOLUTION", 10)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue