61 lines
2.2 KiB
Python
61 lines
2.2 KiB
Python
from zipfile import ZipFile
|
|
|
|
from pipeline.download import inspire
|
|
|
|
|
|
def _write_zip(path):
|
|
with ZipFile(path, "w") as archive:
|
|
archive.writestr("example.gml", "<gml />")
|
|
|
|
|
|
def test_parse_zip_urls_finds_relative_and_absolute_links():
|
|
html = """
|
|
<a href="/datasets/inspire/download/Adur_District_Council.zip">Download</a>
|
|
<a href="https://use-land-property-data.service.gov.uk/datasets/inspire/download/Adur_District_Council.zip">Duplicate</a>
|
|
<a href="/datasets/inspire/download/Barnsley_Metropolitan_Borough_Council.zip?x=1">Query suffix</a>
|
|
<a href="/datasets/llc/download/Adur_District_Council.zip">Wrong dataset</a>
|
|
<a href="https://example.com/datasets/inspire/download/Fake.zip">Wrong host</a>
|
|
"""
|
|
|
|
urls = inspire.parse_zip_urls(html)
|
|
|
|
assert urls == [
|
|
"https://use-land-property-data.service.gov.uk/datasets/inspire/download/Adur_District_Council.zip",
|
|
"https://use-land-property-data.service.gov.uk/datasets/inspire/download/Barnsley_Metropolitan_Borough_Council.zip",
|
|
]
|
|
|
|
|
|
def test_download_one_skips_existing_valid_zip(monkeypatch, tmp_path):
|
|
dest = tmp_path / "Adur_District_Council.zip"
|
|
_write_zip(dest)
|
|
|
|
def fail_download(*args, **kwargs):
|
|
raise AssertionError("download should not run")
|
|
|
|
monkeypatch.setattr(inspire, "_stream_download", fail_download)
|
|
|
|
result = inspire.download_one(
|
|
"https://use-land-property-data.service.gov.uk/datasets/inspire/download/Adur_District_Council.zip",
|
|
tmp_path,
|
|
)
|
|
|
|
assert result == "Adur_District_Council.zip (skipped, valid ZIP exists)"
|
|
|
|
|
|
def test_download_one_replaces_invalid_existing_file(monkeypatch, tmp_path):
|
|
dest = tmp_path / "Adur_District_Council.zip"
|
|
dest.write_text("not a zip")
|
|
|
|
def fake_download(url, output_path, *, timeout):
|
|
_write_zip(output_path)
|
|
|
|
monkeypatch.setattr(inspire, "_stream_download", fake_download)
|
|
|
|
result = inspire.download_one(
|
|
"https://use-land-property-data.service.gov.uk/datasets/inspire/download/Adur_District_Council.zip",
|
|
tmp_path,
|
|
)
|
|
|
|
assert result == "Adur_District_Council.zip"
|
|
assert inspire._is_valid_zip(dest)
|
|
assert not (tmp_path / "Adur_District_Council.zip.tmp").exists()
|