173 lines
6.1 KiB
Python
173 lines
6.1 KiB
Python
import io
|
|
import zipfile
|
|
|
|
from pipeline.download import satellite_highres
|
|
from pipeline.download.satellite_highres import (
|
|
VapTile,
|
|
parse_search_results,
|
|
select_best_rgb_tiles,
|
|
)
|
|
|
|
|
|
def _result(product: str, year: str, resolution: str, tile: str) -> dict:
|
|
"""One search-API record in the real response shape."""
|
|
return {
|
|
"product": {"id": product, "label": product},
|
|
"year": {"id": year, "label": year},
|
|
"resolution": {"id": resolution, "label": f"{resolution}m"},
|
|
"tile": {"id": tile, "label": tile},
|
|
"label": f"{product}-{year}-{resolution}m-{tile}",
|
|
"uri": (
|
|
"https://environment.data.gov.uk/tiles/collections/survey/"
|
|
f"{product}/{year}/{resolution}/{tile}"
|
|
),
|
|
}
|
|
|
|
|
|
# Mirrors a real Greater-London response: RGB at 0.4m (2008) and 0.1m (2011),
|
|
# plus Night Time and LIDAR products that must be ignored.
|
|
SAMPLE_PAYLOAD = {
|
|
"count": 6,
|
|
"results": [
|
|
_result("vertical_aerial_photography_tiles_rgb", "2008", "0.4", "TQ2575"),
|
|
_result("vertical_aerial_photography_tiles_night_time", "2012", "0.2", "TQ2575"),
|
|
_result("lidar_composite_dtm", "2022", "1", "TQ2575"),
|
|
# TQ3080 has two RGB captures: a finer-but-older and a coarser-but-newer.
|
|
_result("vertical_aerial_photography_tiles_rgb", "2008", "0.1", "TQ3080"),
|
|
_result("vertical_aerial_photography_tiles_rgb", "2011", "0.25", "TQ3080"),
|
|
_result("vertical_aerial_photography_tiles_irrgb", "2012", "0.5", "TQ3080"),
|
|
],
|
|
}
|
|
|
|
|
|
def test_parse_search_results_skips_malformed_records() -> None:
|
|
payload = {
|
|
"results": [
|
|
_result("vertical_aerial_photography_tiles_rgb", "2008", "0.4", "TQ2575"),
|
|
{"product": {"id": "broken"}}, # missing year/resolution/tile/uri
|
|
]
|
|
}
|
|
tiles = parse_search_results(payload)
|
|
assert len(tiles) == 1
|
|
assert tiles[0] == VapTile(
|
|
product_id="vertical_aerial_photography_tiles_rgb",
|
|
year=2008,
|
|
resolution_m=0.4,
|
|
os_tile_id="TQ2575",
|
|
uri="https://environment.data.gov.uk/tiles/collections/survey/"
|
|
"vertical_aerial_photography_tiles_rgb/2008/0.4/TQ2575",
|
|
label="vertical_aerial_photography_tiles_rgb-2008-0.4m-TQ2575",
|
|
)
|
|
|
|
|
|
def test_select_best_rgb_filters_non_rgb_products() -> None:
|
|
selected = select_best_rgb_tiles(parse_search_results(SAMPLE_PAYLOAD))
|
|
assert {tile.product_id for tile in selected} == {
|
|
satellite_highres.VAP_RGB_PRODUCT
|
|
}
|
|
|
|
|
|
def test_select_best_rgb_one_tile_per_os_square() -> None:
|
|
selected = select_best_rgb_tiles(parse_search_results(SAMPLE_PAYLOAD))
|
|
assert sorted(tile.os_tile_id for tile in selected) == ["TQ2575", "TQ3080"]
|
|
|
|
|
|
def test_select_best_rgb_prefers_finest_resolution_then_latest_year() -> None:
|
|
selected = {
|
|
tile.os_tile_id: tile
|
|
for tile in select_best_rgb_tiles(parse_search_results(SAMPLE_PAYLOAD))
|
|
}
|
|
# TQ2575: only one RGB capture.
|
|
assert selected["TQ2575"].resolution_m == 0.4
|
|
# TQ3080: finest resolution (0.1m) wins even though it is the older survey.
|
|
assert selected["TQ3080"].resolution_m == 0.1
|
|
assert selected["TQ3080"].year == 2008
|
|
|
|
|
|
def test_select_best_rgb_breaks_resolution_ties_by_year() -> None:
|
|
tiles = [
|
|
VapTile(satellite_highres.VAP_RGB_PRODUCT, 2009, 0.25, "TQ0101", "u", "a"),
|
|
VapTile(satellite_highres.VAP_RGB_PRODUCT, 2018, 0.25, "TQ0101", "u", "b"),
|
|
VapTile(satellite_highres.VAP_RGB_PRODUCT, 2015, 0.25, "TQ0101", "u", "c"),
|
|
]
|
|
selected = select_best_rgb_tiles(tiles)
|
|
assert len(selected) == 1
|
|
assert selected[0].year == 2018
|
|
|
|
|
|
def test_select_best_rgb_empty_when_no_rgb() -> None:
|
|
payload = {"results": [_result("lidar_composite_dtm", "2022", "1", "TQ2575")]}
|
|
assert select_best_rgb_tiles(parse_search_results(payload)) == []
|
|
|
|
|
|
_TILE = VapTile(
|
|
product_id=satellite_highres.VAP_RGB_PRODUCT,
|
|
year=2008,
|
|
resolution_m=0.4,
|
|
os_tile_id="TQ2575",
|
|
uri="https://example.invalid/tile",
|
|
label="t",
|
|
)
|
|
|
|
|
|
def _zip_with_ecw() -> bytes:
|
|
"""A minimal valid survey zip carrying one ECW member."""
|
|
buffer = io.BytesIO()
|
|
with zipfile.ZipFile(buffer, "w") as archive:
|
|
archive.writestr("TQ2575.ecw", b"ecw-pixels")
|
|
archive.writestr("readme.txt", b"ignored")
|
|
return buffer.getvalue()
|
|
|
|
|
|
def _fake_urlopen(bodies: list[bytes]):
|
|
"""Return a urlopen stand-in that yields each body in turn as the response."""
|
|
queue = list(bodies)
|
|
|
|
def opener(_request, timeout=None): # noqa: ANN001 - matches urlopen signature
|
|
return io.BytesIO(queue.pop(0))
|
|
|
|
return opener
|
|
|
|
|
|
def test_download_extracts_ecw_from_valid_zip(tmp_path, monkeypatch) -> None:
|
|
monkeypatch.setattr(
|
|
satellite_highres.urllib.request,
|
|
"urlopen",
|
|
_fake_urlopen([_zip_with_ecw()]),
|
|
)
|
|
paths = satellite_highres._download_and_extract(
|
|
_TILE, tmp_path, key="k", timeout=1.0, retries=2
|
|
)
|
|
assert [p.name for p in paths] == ["TQ2575_TQ2575.ecw"]
|
|
assert paths[0].read_bytes() == b"ecw-pixels"
|
|
# The transient zip is cleaned up regardless.
|
|
assert not (tmp_path / "TQ2575.zip").exists()
|
|
|
|
|
|
def test_download_retries_past_a_corrupt_body(tmp_path, monkeypatch) -> None:
|
|
# First response is a non-zip error page; the retry serves a real zip.
|
|
monkeypatch.setattr(
|
|
satellite_highres.urllib.request,
|
|
"urlopen",
|
|
_fake_urlopen([b"<html>rate limited</html>", _zip_with_ecw()]),
|
|
)
|
|
paths = satellite_highres._download_and_extract(
|
|
_TILE, tmp_path, key="k", timeout=1.0, retries=2
|
|
)
|
|
assert [p.name for p in paths] == ["TQ2575_TQ2575.ecw"]
|
|
|
|
|
|
def test_download_skips_tile_when_all_attempts_are_corrupt(
|
|
tmp_path, monkeypatch
|
|
) -> None:
|
|
# Every attempt returns a non-zip body: the tile is skipped, not raised.
|
|
monkeypatch.setattr(
|
|
satellite_highres.urllib.request,
|
|
"urlopen",
|
|
_fake_urlopen([b"not a zip"] * 3),
|
|
)
|
|
paths = satellite_highres._download_and_extract(
|
|
_TILE, tmp_path, key="k", timeout=1.0, retries=2
|
|
)
|
|
assert paths == []
|
|
assert list(tmp_path.glob("*")) == [] # no leftover zip or partial ecw
|