from zipfile import ZipFile import polars as pl from pipeline.download.geolytix_retail_points import ( read_latest_csv, select_latest_csv_name, ) def test_select_latest_csv_ignores_previous_versions(): names = [ "README.txt", "geolytix_retailpoints_v41_202602.csv", "geolytix_retailpoints_v43_202603.csv", "Previous Versions/geolytix_retailpoints_v99_209901.csv", ] assert select_latest_csv_name(names) == "geolytix_retailpoints_v43_202603.csv" def test_read_latest_csv_validates_required_columns(tmp_path): zip_path = tmp_path / "retail_points.zip" df = pl.DataFrame( { "id": [1], "retailer": ["Waitrose"], "fascia": ["Waitrose"], "store_name": ["Waitrose Test"], "postcode": ["SW1A 1AA"], "long_wgs": [-0.1], "lat_wgs": [51.5], } ) with ZipFile(zip_path, "w") as zip_file: zip_file.writestr("geolytix_retailpoints_v1_202401.csv", "not,the,latest\n") with zip_file.open("geolytix_retailpoints_v2_202402.csv", "w") as csv_file: df.write_csv(csv_file) assert read_latest_csv(zip_path).to_dicts() == df.to_dicts()