perfect-postcode/pipeline/download/test_inspire.py
2026-05-28 21:48:35 +01:00

61 lines
2.2 KiB
Python

from zipfile import ZipFile
from pipeline.download import inspire
def _write_zip(path):
with ZipFile(path, "w") as archive:
archive.writestr("example.gml", "<gml />")
def test_parse_zip_urls_finds_relative_and_absolute_links():
html = """
<a href="/datasets/inspire/download/Adur_District_Council.zip">Download</a>
<a href="https://use-land-property-data.service.gov.uk/datasets/inspire/download/Adur_District_Council.zip">Duplicate</a>
<a href="/datasets/inspire/download/Barnsley_Metropolitan_Borough_Council.zip?x=1">Query suffix</a>
<a href="/datasets/llc/download/Adur_District_Council.zip">Wrong dataset</a>
<a href="https://example.com/datasets/inspire/download/Fake.zip">Wrong host</a>
"""
urls = inspire.parse_zip_urls(html)
assert urls == [
"https://use-land-property-data.service.gov.uk/datasets/inspire/download/Adur_District_Council.zip",
"https://use-land-property-data.service.gov.uk/datasets/inspire/download/Barnsley_Metropolitan_Borough_Council.zip",
]
def test_download_one_skips_existing_valid_zip(monkeypatch, tmp_path):
dest = tmp_path / "Adur_District_Council.zip"
_write_zip(dest)
def fail_download(*args, **kwargs):
raise AssertionError("download should not run")
monkeypatch.setattr(inspire, "_stream_download", fail_download)
result = inspire.download_one(
"https://use-land-property-data.service.gov.uk/datasets/inspire/download/Adur_District_Council.zip",
tmp_path,
)
assert result == "Adur_District_Council.zip (skipped, valid ZIP exists)"
def test_download_one_replaces_invalid_existing_file(monkeypatch, tmp_path):
dest = tmp_path / "Adur_District_Council.zip"
dest.write_text("not a zip")
def fake_download(url, output_path, *, timeout):
_write_zip(output_path)
monkeypatch.setattr(inspire, "_stream_download", fake_download)
result = inspire.download_one(
"https://use-land-property-data.service.gov.uk/datasets/inspire/download/Adur_District_Council.zip",
tmp_path,
)
assert result == "Adur_District_Council.zip"
assert inspire._is_valid_zip(dest)
assert not (tmp_path / "Adur_District_Council.zip.tmp").exists()