"""Download Defra Round 4 (2022) strategic noise data for England. Downloads modelled noise levels (road, rail, airport) as GeoTIFF rasters via WCS, then samples noise values at postcode centroids. Outputs a parquet file with postcode-level noise in dB for each source. Uses 100km tiles (~42 per source) to balance request size vs count. The server times out on tiles larger than ~150km at 100m resolution. Data source: Defra Strategic Noise Mapping Round 4 (2022) - Lden = day-evening-night 24h weighted average (the EU standard metric) - 10m grid, modelled at 4m above ground License: Open Government Licence v3.0 Note: Road/rail use WCS 1.0.0; airport requires WCS 2.0.1 (Defra's 1.0.0 endpoint is broken for that coverage). """ import argparse import tempfile from concurrent.futures import ThreadPoolExecutor, as_completed from pathlib import Path import httpx import numpy as np import polars as pl import rasterio from pyproj import Transformer from rasterio.merge import merge from rasterio.transform import rowcol # Noise sources: (label, column_name, WCS base URL, coverage ID, WCS version) # Road/rail work with WCS 1.0.0; airport requires WCS 2.0.1. NOISE_SOURCES = [ ( "Road", "road_noise_lden_db", "https://environment.data.gov.uk/spatialdata/road-noise-all-metrics-england-round-4/wcs", "Road_Noise_Lden_England_Round_4_All", "1.0.0", ), ( "Rail", "rail_noise_lden_db", "https://environment.data.gov.uk/spatialdata/noise-data/wcs", "Rail_Noise_Lden_England_Round_4_All", "1.0.0", ), ( "Airport", "airport_noise_lden_db", "https://environment.data.gov.uk/spatialdata/airport-noise-all-metrics-england-round-4/wcs", "dac9cba4-abe7-43bd-b8e9-8a83da52edd8__Airport_Noise_ALL_Lden", "2.0.1", ), ] # England extent in EPSG:27700 (British National Grid), rounded outward BNG_MIN_E = 80_000 BNG_MAX_E = 660_000 BNG_MIN_N = 0 BNG_MAX_N = 660_000 # Tile size in metres (100km balances request size vs count; 300km causes 504s) TILE_SIZE = 100_000 # Max concurrent tile downloads MAX_WORKERS = 4 # Native raster resolution (10m grid) NATIVE_RESOLUTION = 10 # Request pixel resolution in metres (100m is sufficient for postcode-level data # and keeps download size ~100x smaller than native 10m) RESOLUTION = 100 def _wcs_get_coverage_url( wcs_base: str, coverage_id: str, min_e: int, min_n: int, max_e: int, max_n: int, wcs_version: str = "1.0.0", ) -> str: """Build a WCS GetCoverage URL for a BNG bounding box.""" if wcs_version == "2.0.1": return ( f"{wcs_base}?" f"service=WCS&version=2.0.1&request=GetCoverage" f"&coverageId={coverage_id}" f"&format=image/tiff" f"&subsettingCRS=EPSG:27700" f"&subset=E({min_e},{max_e})" f"&subset=N({min_n},{max_n})" f"&scaleFactor={NATIVE_RESOLUTION / RESOLUTION}" ) width = (max_e - min_e) // RESOLUTION height = (max_n - min_n) // RESOLUTION return ( f"{wcs_base}?" f"service=WCS&version=1.0.0&request=GetCoverage" f"&coverage={coverage_id}" f"&CRS=EPSG:27700" f"&BBOX={min_e},{min_n},{max_e},{max_n}" f"&width={width}&height={height}" f"&format=GeoTIFF" ) _TO_BNG = Transformer.from_crs("EPSG:4326", "EPSG:27700", always_xy=True) def _bng_from_latlon(lat: np.ndarray, lon: np.ndarray) -> tuple[np.ndarray, np.ndarray]: """Convert WGS84 lat/lon to British National Grid easting/northing.""" return _TO_BNG.transform(lon, lat) # pyproj takes (x=lon, y=lat) def _download_tile( wcs_base: str, coverage_id: str, min_e: int, min_n: int, max_e: int, max_n: int, tile_path: Path, wcs_version: str = "1.0.0", ) -> Path | None: """Download a single WCS tile. Returns path if successful, None otherwise.""" url = _wcs_get_coverage_url( wcs_base, coverage_id, min_e, min_n, max_e, max_n, wcs_version ) try: with httpx.Client(timeout=300, follow_redirects=True) as client: resp = client.get(url) resp.raise_for_status() content_type = resp.headers.get("content-type", "") if "tiff" not in content_type and resp.content[:4] not in ( b"II*\x00", b"MM\x00*", ): return None tile_path.write_bytes(resp.content) return tile_path except (httpx.HTTPStatusError, httpx.TimeoutException, httpx.ConnectError) as e: print(f" Failed to download tile ({min_e},{min_n})-({max_e},{max_n}): {e}") return None def download_raster( tile_dir: Path, wcs_base: str, coverage_id: str, label: str, wcs_version: str = "1.0.0", ) -> list[Path]: """Download noise GeoTIFF raster covering England, returning paths to saved files.""" tiles = [] for min_e in range(BNG_MIN_E, BNG_MAX_E, TILE_SIZE): for min_n in range(BNG_MIN_N, BNG_MAX_N, TILE_SIZE): max_e = min(min_e + TILE_SIZE, BNG_MAX_E) max_n = min(min_n + TILE_SIZE, BNG_MAX_N) tiles.append((min_e, min_n, max_e, max_n)) print( f"[{label}] Downloading {len(tiles)} tiles at {RESOLUTION}m resolution ({MAX_WORKERS} workers)..." ) paths = [] completed = 0 with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor: futures = {} for min_e, min_n, max_e, max_n in tiles: tile_path = tile_dir / f"tile_{min_e}_{min_n}.tif" fut = executor.submit( _download_tile, wcs_base, coverage_id, min_e, min_n, max_e, max_n, tile_path, wcs_version, ) futures[fut] = (min_e, min_n) for fut in as_completed(futures): completed += 1 result = fut.result() if result is not None: paths.append(result) print( f"\r [{completed}/{len(tiles)}] Downloaded {len(paths)} valid tiles", end="", flush=True, ) print(f"\n[{label}] Downloaded {len(paths)}/{len(tiles)} tiles") return paths def sample_noise_at_postcodes( tile_paths: list[Path], easting: np.ndarray, northing: np.ndarray, label: str, col_name: str, ) -> pl.Series: """Sample noise values from merged tiles at given BNG coordinates.""" print(f"[{label}] Merging {len(tile_paths)} tiles...") datasets = [rasterio.open(p) for p in tile_paths] raster_nodata = datasets[0].nodata mosaic, mosaic_transform = merge(datasets) for ds in datasets: ds.close() noise_grid = mosaic[0] print(f"[{label}] Sampling noise values at postcode centroids...") rows, cols = rowcol(mosaic_transform, easting, northing) rows = np.asarray(rows) cols = np.asarray(cols) h, w = noise_grid.shape in_bounds = (rows >= 0) & (rows < h) & (cols >= 0) & (cols < w) noise_db = np.full(len(easting), np.nan, dtype=np.float32) valid_rows = rows[in_bounds] valid_cols = cols[in_bounds] sampled = noise_grid[valid_rows, valid_cols].astype(np.float32) # Mark nodata and zero (unmapped areas) as NaN. # Road/rail use nodata=-96, airport uses nodata=3.4e38. if raster_nodata is not None: sampled[np.isclose(sampled, np.float32(raster_nodata), rtol=1e-5)] = np.nan sampled[sampled == 0] = np.nan noise_db[in_bounds] = sampled valid_count = int(np.sum(~np.isnan(noise_db))) print( f"[{label}] Sampled {valid_count:,} / {len(easting):,} postcodes with noise data" ) # Return as masked Series: use null (not NaN) so that Polars max_horizontal # correctly ignores missing values instead of propagating NaN. return pl.Series(col_name, noise_db).fill_nan(None) def main() -> None: parser = argparse.ArgumentParser( description="Download Defra noise data (road, rail, airport) and sample at postcode centroids" ) parser.add_argument( "--arcgis", type=Path, required=True, help="ArcGIS postcode data parquet (for lat/lon coordinates)", ) parser.add_argument( "--output", type=Path, required=True, help="Output parquet file path" ) args = parser.parse_args() args.output.parent.mkdir(parents=True, exist_ok=True) print("Loading postcode coordinates...") postcodes = pl.read_parquet(args.arcgis, columns=["pcds", "lat", "long"]).rename( {"pcds": "postcode", "long": "lon"} ) lat = postcodes["lat"].to_numpy() lon = postcodes["lon"].to_numpy() print("Converting lat/lon to BNG...") easting, northing = _bng_from_latlon(lat, lon) result = postcodes.select("postcode") with tempfile.TemporaryDirectory() as tmp: for label, col_name, wcs_base, coverage_id, wcs_version in NOISE_SOURCES: tile_dir = Path(tmp) / label.lower() tile_dir.mkdir() tile_paths = download_raster( tile_dir, wcs_base, coverage_id, label, wcs_version ) if not tile_paths: print( f"[{label}] WARNING: No tiles downloaded — column will be all null" ) series = pl.Series(col_name, [None] * len(lat), dtype=pl.Float32) else: series = sample_noise_at_postcodes( tile_paths, easting, northing, label, col_name ) result = result.with_columns(series) result.write_parquet(args.output, compression="zstd") size_mb = args.output.stat().st_size / (1024 * 1024) print(f"Wrote {args.output} ({size_mb:.1f} MB)") if __name__ == "__main__": main()