import polars as pl from pipeline.transform.transform_poi import transform_grocery_retail_points def test_transform_grocery_retail_points_outputs_chain_categories(): raw = pl.DataFrame( { "id": [101, 102, 103], "retailer": ["Waitrose", "Sainsburys", "The Co-operative Group"], "fascia": ["Waitrose", "Sainsbury's Local", "Co-op Food"], "store_name": ["Waitrose Test", "Sainsbury''s Test", "Co-op Test"], "long_wgs": [-0.141, -0.142, -0.143], "lat_wgs": [51.515, 51.516, 51.517], } ) pois = transform_grocery_retail_points(raw, min_chain_locations=1) assert pois.select( "id", "name", "category", "icon_category", "group", "emoji" ).to_dicts() == [ { "id": "glx-101", "name": "Waitrose Test", "category": "Waitrose", "icon_category": "Waitrose", "group": "Groceries", "emoji": "🛒", }, { "id": "glx-102", "name": "Sainsbury's Test", "category": "Sainsbury's", "icon_category": "Sainsbury's Local", "group": "Groceries", "emoji": "🛒", }, { "id": "glx-103", "name": "Co-op Test", "category": "Co-op", "icon_category": "Co-op", "group": "Groceries", "emoji": "🛒", }, ] def test_transform_grocery_retail_points_keeps_fascia_icon_category(): raw = pl.DataFrame( { "id": [101, 102, 103, 104], "retailer": ["Tesco", "Iceland", "Waitrose", "Morrisons"], "fascia": [ "Tesco Express Esso", "The Food Warehouse", "Little Waitrose Shell", "Morrisons Daily", ], "store_name": [ "Tesco Test Express", "Iceland Test Food Warehouse", "Little Waitrose Test", "Morrisons Daily Test", ], "long_wgs": [-0.141, -0.142, -0.143, -0.144], "lat_wgs": [51.515, 51.516, 51.517, 51.518], } ) pois = transform_grocery_retail_points(raw, min_chain_locations=1) assert pois.select("category", "icon_category").to_dicts() == [ {"category": "Tesco", "icon_category": "Tesco Express"}, {"category": "Iceland", "icon_category": "The Food Warehouse"}, {"category": "Waitrose", "icon_category": "Little Waitrose"}, {"category": "Morrisons", "icon_category": "Morrisons Daily"}, ] def test_transform_grocery_retail_points_merges_cooperative_societies(): raw = pl.DataFrame( { "id": [101, 102, 103], "retailer": [ "Central England Co-operative", "Lincolnshire Co-operative", "The Southern Co-operative", ], "fascia": [ "Central England Co-operative", "The Co-operative Food", None, ], "store_name": [ "Central Co-op Test", "Lincolnshire Co-op Test", "Southern Co-op Test", ], "long_wgs": [-0.141, -0.142, -0.143], "lat_wgs": [51.515, 51.516, 51.517], } ) pois = transform_grocery_retail_points(raw, min_chain_locations=1) assert pois.select("category", "icon_category").to_dicts() == [ {"category": "Co-op", "icon_category": "Co-op"}, {"category": "Co-op", "icon_category": "Co-op"}, {"category": "Co-op", "icon_category": "Co-op"}, ] def test_transform_grocery_retail_points_accepts_base_fascias(): raw = pl.DataFrame( { "id": [101, 102, 103, 104], "retailer": ["Aldi", "Asda", "Booths", "Whole Foods Market"], "fascia": ["Aldi", "Asda Superstore", "Booths", "Whole Foods Market"], "store_name": [ "Aldi Test", "Asda Test Superstore", "Booths Test", "Whole Foods Test", ], "long_wgs": [-0.141, -0.142, -0.143, -0.144], "lat_wgs": [51.515, 51.516, 51.517, 51.518], } ) pois = transform_grocery_retail_points(raw, min_chain_locations=1) assert pois.select("category", "icon_category").to_dicts() == [ {"category": "Aldi", "icon_category": "Aldi"}, {"category": "Asda", "icon_category": "Asda Superstore"}, {"category": "Booths", "icon_category": "Booths"}, {"category": "Whole Foods Market", "icon_category": "Whole Foods Market"}, ] def test_transform_grocery_retail_points_drops_invalid_rows(): raw = pl.DataFrame( { "id": [101, 102], "retailer": ["Waitrose", ""], "fascia": ["Waitrose", "Tesco"], "store_name": ["Waitrose Test", "Tesco Test"], "long_wgs": [-0.141, -0.142], "lat_wgs": [51.515, 51.516], } ) pois = transform_grocery_retail_points(raw, min_chain_locations=1) assert pois["category"].to_list() == ["Waitrose"] def test_transform_grocery_retail_points_includes_unmapped_chains_with_five_locations(): raw = pl.DataFrame( { "id": list(range(1, 10)), "retailer": ["Tian Tian"] * 5 + ["Corner Shop"] * 4, "fascia": ["Tian Tian Market"] * 5 + ["Corner Shop"] * 4, "store_name": [f"Store {i}" for i in range(1, 10)], "long_wgs": [-0.1] * 9, "lat_wgs": [51.5] * 9, } ) pois = transform_grocery_retail_points(raw) assert pois.select("id", "category", "icon_category").to_dicts() == [ {"id": "glx-1", "category": "Tian Tian", "icon_category": "Tian Tian"}, {"id": "glx-2", "category": "Tian Tian", "icon_category": "Tian Tian"}, {"id": "glx-3", "category": "Tian Tian", "icon_category": "Tian Tian"}, {"id": "glx-4", "category": "Tian Tian", "icon_category": "Tian Tian"}, {"id": "glx-5", "category": "Tian Tian", "icon_category": "Tian Tian"}, ]