Fix flake
This commit is contained in:
parent
df63764a9f
commit
3e307c2fff
2 changed files with 106 additions and 5 deletions
|
|
@ -183,9 +183,19 @@ def search_vap_tiles(aoi: dict, timeout: float = 60.0) -> list[VapTile]:
|
|||
def _download_and_extract(
|
||||
tile: VapTile, ecw_dir: Path, key: str, timeout: float, retries: int
|
||||
) -> list[Path]:
|
||||
"""Download one survey zip and extract its ECW raster(s)."""
|
||||
"""Download one survey zip and extract its ECW raster(s).
|
||||
|
||||
The Defra endpoint sometimes answers a download with a 200 whose body is an
|
||||
error/HTML page or a truncated stream rather than a ZIP (rate limiting, a
|
||||
rotated key, a transient backend hiccup). We treat a non-ZIP body the same as
|
||||
a network error -- retry it -- and, if every attempt fails, skip just this
|
||||
tile rather than aborting the whole multi-tile run: missing coverage simply
|
||||
shows the Sentinel-2 base through, and the caller still hard-fails if *no*
|
||||
ECWs were extracted at all.
|
||||
"""
|
||||
url = f"{tile.uri}?subscription-key={key}"
|
||||
zip_path = ecw_dir / f"{tile.os_tile_id}.zip"
|
||||
last_err: Exception | None = None
|
||||
for attempt in range(retries + 1):
|
||||
try:
|
||||
with urllib.request.urlopen(
|
||||
|
|
@ -193,10 +203,25 @@ def _download_and_extract(
|
|||
timeout=timeout,
|
||||
) as response, zip_path.open("wb") as out:
|
||||
shutil.copyfileobj(response, out, length=1 << 20)
|
||||
break
|
||||
except (urllib.error.URLError, TimeoutError, ConnectionError) as err:
|
||||
if attempt == retries:
|
||||
raise RuntimeError(f"Failed to download {url}: {err}") from err
|
||||
return _extract_ecws(zip_path, tile, ecw_dir)
|
||||
except (
|
||||
urllib.error.URLError,
|
||||
TimeoutError,
|
||||
ConnectionError,
|
||||
zipfile.BadZipFile,
|
||||
) as err:
|
||||
last_err = err
|
||||
zip_path.unlink(missing_ok=True)
|
||||
print(
|
||||
f" {tile.os_tile_id}: download failed after {retries + 1} attempt(s), "
|
||||
f"skipping ({last_err})",
|
||||
flush=True,
|
||||
)
|
||||
return []
|
||||
|
||||
|
||||
def _extract_ecws(zip_path: Path, tile: VapTile, ecw_dir: Path) -> list[Path]:
|
||||
"""Extract every ECW raster from a downloaded survey zip; delete the zip."""
|
||||
extracted: list[Path] = []
|
||||
with zipfile.ZipFile(zip_path) as archive:
|
||||
for member in archive.infolist():
|
||||
|
|
|
|||
|
|
@ -1,3 +1,6 @@
|
|||
import io
|
||||
import zipfile
|
||||
|
||||
from pipeline.download import satellite_highres
|
||||
from pipeline.download.satellite_highres import (
|
||||
VapTile,
|
||||
|
|
@ -95,3 +98,76 @@ def test_select_best_rgb_breaks_resolution_ties_by_year() -> None:
|
|||
def test_select_best_rgb_empty_when_no_rgb() -> None:
|
||||
payload = {"results": [_result("lidar_composite_dtm", "2022", "1", "TQ2575")]}
|
||||
assert select_best_rgb_tiles(parse_search_results(payload)) == []
|
||||
|
||||
|
||||
_TILE = VapTile(
|
||||
product_id=satellite_highres.VAP_RGB_PRODUCT,
|
||||
year=2008,
|
||||
resolution_m=0.4,
|
||||
os_tile_id="TQ2575",
|
||||
uri="https://example.invalid/tile",
|
||||
label="t",
|
||||
)
|
||||
|
||||
|
||||
def _zip_with_ecw() -> bytes:
|
||||
"""A minimal valid survey zip carrying one ECW member."""
|
||||
buffer = io.BytesIO()
|
||||
with zipfile.ZipFile(buffer, "w") as archive:
|
||||
archive.writestr("TQ2575.ecw", b"ecw-pixels")
|
||||
archive.writestr("readme.txt", b"ignored")
|
||||
return buffer.getvalue()
|
||||
|
||||
|
||||
def _fake_urlopen(bodies: list[bytes]):
|
||||
"""Return a urlopen stand-in that yields each body in turn as the response."""
|
||||
queue = list(bodies)
|
||||
|
||||
def opener(_request, timeout=None): # noqa: ANN001 - matches urlopen signature
|
||||
return io.BytesIO(queue.pop(0))
|
||||
|
||||
return opener
|
||||
|
||||
|
||||
def test_download_extracts_ecw_from_valid_zip(tmp_path, monkeypatch) -> None:
|
||||
monkeypatch.setattr(
|
||||
satellite_highres.urllib.request,
|
||||
"urlopen",
|
||||
_fake_urlopen([_zip_with_ecw()]),
|
||||
)
|
||||
paths = satellite_highres._download_and_extract(
|
||||
_TILE, tmp_path, key="k", timeout=1.0, retries=2
|
||||
)
|
||||
assert [p.name for p in paths] == ["TQ2575_TQ2575.ecw"]
|
||||
assert paths[0].read_bytes() == b"ecw-pixels"
|
||||
# The transient zip is cleaned up regardless.
|
||||
assert not (tmp_path / "TQ2575.zip").exists()
|
||||
|
||||
|
||||
def test_download_retries_past_a_corrupt_body(tmp_path, monkeypatch) -> None:
|
||||
# First response is a non-zip error page; the retry serves a real zip.
|
||||
monkeypatch.setattr(
|
||||
satellite_highres.urllib.request,
|
||||
"urlopen",
|
||||
_fake_urlopen([b"<html>rate limited</html>", _zip_with_ecw()]),
|
||||
)
|
||||
paths = satellite_highres._download_and_extract(
|
||||
_TILE, tmp_path, key="k", timeout=1.0, retries=2
|
||||
)
|
||||
assert [p.name for p in paths] == ["TQ2575_TQ2575.ecw"]
|
||||
|
||||
|
||||
def test_download_skips_tile_when_all_attempts_are_corrupt(
|
||||
tmp_path, monkeypatch
|
||||
) -> None:
|
||||
# Every attempt returns a non-zip body: the tile is skipped, not raised.
|
||||
monkeypatch.setattr(
|
||||
satellite_highres.urllib.request,
|
||||
"urlopen",
|
||||
_fake_urlopen([b"not a zip"] * 3),
|
||||
)
|
||||
paths = satellite_highres._download_and_extract(
|
||||
_TILE, tmp_path, key="k", timeout=1.0, retries=2
|
||||
)
|
||||
assert paths == []
|
||||
assert list(tmp_path.glob("*")) == [] # no leftover zip or partial ecw
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue