from zipfile import ZipFile from pipeline.download import inspire def _write_zip(path): with ZipFile(path, "w") as archive: archive.writestr("example.gml", "") def test_parse_zip_urls_finds_relative_and_absolute_links(): html = """ Download Duplicate Query suffix Wrong dataset Wrong host """ urls = inspire.parse_zip_urls(html) assert urls == [ "https://use-land-property-data.service.gov.uk/datasets/inspire/download/Adur_District_Council.zip", "https://use-land-property-data.service.gov.uk/datasets/inspire/download/Barnsley_Metropolitan_Borough_Council.zip", ] def test_download_one_skips_existing_valid_zip(monkeypatch, tmp_path): dest = tmp_path / "Adur_District_Council.zip" _write_zip(dest) def fail_download(*args, **kwargs): raise AssertionError("download should not run") monkeypatch.setattr(inspire, "_stream_download", fail_download) result = inspire.download_one( "https://use-land-property-data.service.gov.uk/datasets/inspire/download/Adur_District_Council.zip", tmp_path, ) assert result == "Adur_District_Council.zip (skipped, valid ZIP exists)" def test_download_one_replaces_invalid_existing_file(monkeypatch, tmp_path): dest = tmp_path / "Adur_District_Council.zip" dest.write_text("not a zip") def fake_download(url, output_path, *, timeout): _write_zip(output_path) monkeypatch.setattr(inspire, "_stream_download", fake_download) result = inspire.download_one( "https://use-land-property-data.service.gov.uk/datasets/inspire/download/Adur_District_Council.zip", tmp_path, ) assert result == "Adur_District_Council.zip" assert inspire._is_valid_zip(dest) assert not (tmp_path / "Adur_District_Council.zip.tmp").exists()