from __future__ import annotations import zipfile import polars as pl from pipeline.validate_outputs import main def test_validates_parquet_file_and_zip(tmp_path, monkeypatch): parquet_path = tmp_path / "data.parquet" file_path = tmp_path / "plain.txt" zip_path = tmp_path / "archive.zip" pl.DataFrame({"value": [1]}).write_parquet(parquet_path) file_path.write_text("ok\n") with zipfile.ZipFile(zip_path, "w") as archive: archive.writestr("data.txt", "ok\n") monkeypatch.setattr( "sys.argv", [ "validate_outputs", "--parquet", str(parquet_path), "--file", str(file_path), "--zip", str(zip_path), "--glob", f"{tmp_path}::*.txt", "--zip-glob", f"{tmp_path}::*.zip", ], ) assert main() == 0 def test_rejects_missing_and_empty_outputs(tmp_path, monkeypatch, capsys): empty_path = tmp_path / "empty.txt" empty_path.touch() monkeypatch.setattr( "sys.argv", [ "validate_outputs", "--file", str(empty_path), "--parquet", str(tmp_path / "missing.parquet"), "--glob", f"{tmp_path}::*.csv", ], ) assert main() == 1 stderr = capsys.readouterr().err assert "empty file" in stderr assert "missing" in stderr assert "no files matched" in stderr