import polars as pl from pipeline.transform.transform_poi import transform_grocery_retail_points def test_transform_grocery_retail_points_outputs_chain_categories(): raw = pl.DataFrame( { "id": [101, 102, 103], "retailer": ["Waitrose", "Sainsburys", "The Co-operative Group"], "fascia": ["Waitrose", "Sainsbury's Local", "Co-op Food"], "store_name": ["Waitrose Test", "Sainsbury''s Test", "Co-op Test"], "long_wgs": [-0.141, -0.142, -0.143], "lat_wgs": [51.515, 51.516, 51.517], } ) pois = transform_grocery_retail_points(raw) assert pois.select( "id", "name", "category", "icon_category", "group", "emoji" ).to_dicts() == [ { "id": "glx-101", "name": "Waitrose Test", "category": "Waitrose", "icon_category": "Waitrose", "group": "Groceries", "emoji": "🛒", }, { "id": "glx-102", "name": "Sainsbury's Test", "category": "Sainsbury's", "icon_category": "Sainsbury's Local", "group": "Groceries", "emoji": "🛒", }, { "id": "glx-103", "name": "Co-op Test", "category": "Co-op", "icon_category": "Co-op", "group": "Groceries", "emoji": "🛒", }, ] def test_transform_grocery_retail_points_keeps_fascia_icon_category(): raw = pl.DataFrame( { "id": [101, 102, 103, 104], "retailer": ["Tesco", "Iceland", "Waitrose", "Morrisons"], "fascia": [ "Tesco Express Esso", "The Food Warehouse", "Little Waitrose Shell", "Morrisons Daily", ], "store_name": [ "Tesco Test Express", "Iceland Test Food Warehouse", "Little Waitrose Test", "Morrisons Daily Test", ], "long_wgs": [-0.141, -0.142, -0.143, -0.144], "lat_wgs": [51.515, 51.516, 51.517, 51.518], } ) pois = transform_grocery_retail_points(raw) assert pois.select("category", "icon_category").to_dicts() == [ {"category": "Tesco", "icon_category": "Tesco Express"}, {"category": "Iceland", "icon_category": "The Food Warehouse"}, {"category": "Waitrose", "icon_category": "Little Waitrose"}, {"category": "Morrisons", "icon_category": "Morrisons Daily"}, ] def test_transform_grocery_retail_points_accepts_base_fascias(): raw = pl.DataFrame( { "id": [101, 102, 103, 104], "retailer": ["Aldi", "Asda", "Booths", "Whole Foods Market"], "fascia": ["Aldi", "Asda Superstore", "Booths", "Whole Foods Market"], "store_name": [ "Aldi Test", "Asda Test Superstore", "Booths Test", "Whole Foods Test", ], "long_wgs": [-0.141, -0.142, -0.143, -0.144], "lat_wgs": [51.515, 51.516, 51.517, 51.518], } ) pois = transform_grocery_retail_points(raw) assert pois.select("category", "icon_category").to_dicts() == [ {"category": "Aldi", "icon_category": "Aldi"}, {"category": "Asda", "icon_category": "Asda Superstore"}, {"category": "Booths", "icon_category": "Booths"}, {"category": "Whole Foods Market", "icon_category": "Whole Foods Market"}, ] def test_transform_grocery_retail_points_drops_invalid_rows(): raw = pl.DataFrame( { "id": [101, 102], "retailer": ["Waitrose", ""], "fascia": ["Waitrose", "Tesco"], "store_name": ["Waitrose Test", "Tesco Test"], "long_wgs": [-0.141, -0.142], "lat_wgs": [51.515, 51.516], } ) pois = transform_grocery_retail_points(raw) assert pois["category"].to_list() == ["Waitrose"]