from zipfile import ZipFile
from pipeline.download import inspire
def _write_zip(path):
with ZipFile(path, "w") as archive:
archive.writestr("example.gml", "")
def test_parse_zip_urls_finds_relative_and_absolute_links():
html = """
Download
Duplicate
Query suffix
Wrong dataset
Wrong host
"""
urls = inspire.parse_zip_urls(html)
assert urls == [
"https://use-land-property-data.service.gov.uk/datasets/inspire/download/Adur_District_Council.zip",
"https://use-land-property-data.service.gov.uk/datasets/inspire/download/Barnsley_Metropolitan_Borough_Council.zip",
]
def test_download_one_skips_existing_valid_zip(monkeypatch, tmp_path):
dest = tmp_path / "Adur_District_Council.zip"
_write_zip(dest)
def fail_download(*args, **kwargs):
raise AssertionError("download should not run")
monkeypatch.setattr(inspire, "_stream_download", fail_download)
result = inspire.download_one(
"https://use-land-property-data.service.gov.uk/datasets/inspire/download/Adur_District_Council.zip",
tmp_path,
)
assert result == "Adur_District_Council.zip (skipped, valid ZIP exists)"
def test_download_one_replaces_invalid_existing_file(monkeypatch, tmp_path):
dest = tmp_path / "Adur_District_Council.zip"
dest.write_text("not a zip")
def fake_download(url, output_path, *, timeout):
_write_zip(output_path)
monkeypatch.setattr(inspire, "_stream_download", fake_download)
result = inspire.download_one(
"https://use-land-property-data.service.gov.uk/datasets/inspire/download/Adur_District_Council.zip",
tmp_path,
)
assert result == "Adur_District_Council.zip"
assert inspire._is_valid_zip(dest)
assert not (tmp_path / "Adur_District_Council.zip.tmp").exists()