179 lines
6.1 KiB
Python
179 lines
6.1 KiB
Python
import polars as pl
|
|
|
|
from pipeline.transform.transform_poi import transform_grocery_retail_points
|
|
|
|
|
|
def test_transform_grocery_retail_points_outputs_chain_categories():
|
|
raw = pl.DataFrame(
|
|
{
|
|
"id": [101, 102, 103],
|
|
"retailer": ["Waitrose", "Sainsburys", "The Co-operative Group"],
|
|
"fascia": ["Waitrose", "Sainsbury's Local", "Co-op Food"],
|
|
"store_name": ["Waitrose Test", "Sainsbury''s Test", "Co-op Test"],
|
|
"long_wgs": [-0.141, -0.142, -0.143],
|
|
"lat_wgs": [51.515, 51.516, 51.517],
|
|
}
|
|
)
|
|
|
|
pois = transform_grocery_retail_points(raw, min_chain_locations=1)
|
|
|
|
assert pois.select(
|
|
"id", "name", "category", "icon_category", "group", "emoji"
|
|
).to_dicts() == [
|
|
{
|
|
"id": "glx-101",
|
|
"name": "Waitrose Test",
|
|
"category": "Waitrose",
|
|
"icon_category": "Waitrose",
|
|
"group": "Groceries",
|
|
"emoji": "🛒",
|
|
},
|
|
{
|
|
"id": "glx-102",
|
|
"name": "Sainsbury's Test",
|
|
"category": "Sainsbury's",
|
|
"icon_category": "Sainsbury's Local",
|
|
"group": "Groceries",
|
|
"emoji": "🛒",
|
|
},
|
|
{
|
|
"id": "glx-103",
|
|
"name": "Co-op Test",
|
|
"category": "Co-op",
|
|
"icon_category": "Co-op",
|
|
"group": "Groceries",
|
|
"emoji": "🛒",
|
|
},
|
|
]
|
|
|
|
|
|
def test_transform_grocery_retail_points_keeps_fascia_icon_category():
|
|
raw = pl.DataFrame(
|
|
{
|
|
"id": [101, 102, 103, 104],
|
|
"retailer": ["Tesco", "Iceland", "Waitrose", "Morrisons"],
|
|
"fascia": [
|
|
"Tesco Express Esso",
|
|
"The Food Warehouse",
|
|
"Little Waitrose Shell",
|
|
"Morrisons Daily",
|
|
],
|
|
"store_name": [
|
|
"Tesco Test Express",
|
|
"Iceland Test Food Warehouse",
|
|
"Little Waitrose Test",
|
|
"Morrisons Daily Test",
|
|
],
|
|
"long_wgs": [-0.141, -0.142, -0.143, -0.144],
|
|
"lat_wgs": [51.515, 51.516, 51.517, 51.518],
|
|
}
|
|
)
|
|
|
|
pois = transform_grocery_retail_points(raw, min_chain_locations=1)
|
|
|
|
assert pois.select("category", "icon_category").to_dicts() == [
|
|
{"category": "Tesco", "icon_category": "Tesco Express"},
|
|
{"category": "Iceland", "icon_category": "The Food Warehouse"},
|
|
{"category": "Waitrose", "icon_category": "Little Waitrose"},
|
|
{"category": "Morrisons", "icon_category": "Morrisons Daily"},
|
|
]
|
|
|
|
|
|
def test_transform_grocery_retail_points_merges_cooperative_societies():
|
|
raw = pl.DataFrame(
|
|
{
|
|
"id": [101, 102, 103],
|
|
"retailer": [
|
|
"Central England Co-operative",
|
|
"Lincolnshire Co-operative",
|
|
"The Southern Co-operative",
|
|
],
|
|
"fascia": [
|
|
"Central England Co-operative",
|
|
"The Co-operative Food",
|
|
None,
|
|
],
|
|
"store_name": [
|
|
"Central Co-op Test",
|
|
"Lincolnshire Co-op Test",
|
|
"Southern Co-op Test",
|
|
],
|
|
"long_wgs": [-0.141, -0.142, -0.143],
|
|
"lat_wgs": [51.515, 51.516, 51.517],
|
|
}
|
|
)
|
|
|
|
pois = transform_grocery_retail_points(raw, min_chain_locations=1)
|
|
|
|
assert pois.select("category", "icon_category").to_dicts() == [
|
|
{"category": "Co-op", "icon_category": "Co-op"},
|
|
{"category": "Co-op", "icon_category": "Co-op"},
|
|
{"category": "Co-op", "icon_category": "Co-op"},
|
|
]
|
|
|
|
|
|
def test_transform_grocery_retail_points_accepts_base_fascias():
|
|
raw = pl.DataFrame(
|
|
{
|
|
"id": [101, 102, 103, 104],
|
|
"retailer": ["Aldi", "Asda", "Booths", "Whole Foods Market"],
|
|
"fascia": ["Aldi", "Asda Superstore", "Booths", "Whole Foods Market"],
|
|
"store_name": [
|
|
"Aldi Test",
|
|
"Asda Test Superstore",
|
|
"Booths Test",
|
|
"Whole Foods Test",
|
|
],
|
|
"long_wgs": [-0.141, -0.142, -0.143, -0.144],
|
|
"lat_wgs": [51.515, 51.516, 51.517, 51.518],
|
|
}
|
|
)
|
|
|
|
pois = transform_grocery_retail_points(raw, min_chain_locations=1)
|
|
|
|
assert pois.select("category", "icon_category").to_dicts() == [
|
|
{"category": "Aldi", "icon_category": "Aldi"},
|
|
{"category": "Asda", "icon_category": "Asda Superstore"},
|
|
{"category": "Booths", "icon_category": "Booths"},
|
|
{"category": "Whole Foods Market", "icon_category": "Whole Foods Market"},
|
|
]
|
|
|
|
|
|
def test_transform_grocery_retail_points_drops_invalid_rows():
|
|
raw = pl.DataFrame(
|
|
{
|
|
"id": [101, 102],
|
|
"retailer": ["Waitrose", ""],
|
|
"fascia": ["Waitrose", "Tesco"],
|
|
"store_name": ["Waitrose Test", "Tesco Test"],
|
|
"long_wgs": [-0.141, -0.142],
|
|
"lat_wgs": [51.515, 51.516],
|
|
}
|
|
)
|
|
|
|
pois = transform_grocery_retail_points(raw, min_chain_locations=1)
|
|
|
|
assert pois["category"].to_list() == ["Waitrose"]
|
|
|
|
|
|
def test_transform_grocery_retail_points_includes_unmapped_chains_with_five_locations():
|
|
raw = pl.DataFrame(
|
|
{
|
|
"id": list(range(1, 10)),
|
|
"retailer": ["Tian Tian"] * 5 + ["Corner Shop"] * 4,
|
|
"fascia": ["Tian Tian Market"] * 5 + ["Corner Shop"] * 4,
|
|
"store_name": [f"Store {i}" for i in range(1, 10)],
|
|
"long_wgs": [-0.1] * 9,
|
|
"lat_wgs": [51.5] * 9,
|
|
}
|
|
)
|
|
|
|
pois = transform_grocery_retail_points(raw)
|
|
|
|
assert pois.select("id", "category", "icon_category").to_dicts() == [
|
|
{"id": "glx-1", "category": "Tian Tian", "icon_category": "Tian Tian"},
|
|
{"id": "glx-2", "category": "Tian Tian", "icon_category": "Tian Tian"},
|
|
{"id": "glx-3", "category": "Tian Tian", "icon_category": "Tian Tian"},
|
|
{"id": "glx-4", "category": "Tian Tian", "icon_category": "Tian Tian"},
|
|
{"id": "glx-5", "category": "Tian Tian", "icon_category": "Tian Tian"},
|
|
]
|