perfect-postcode/pipeline/transform/test_transform_poi.py
Andras Schmelczer 6cc7288126
Some checks failed
CI / Check (push) Has been cancelled
Build and publish Docker image / build-and-push (push) Has been cancelled
All good
2026-05-18 21:20:10 +01:00

179 lines
6.1 KiB
Python

import polars as pl
from pipeline.transform.transform_poi import transform_grocery_retail_points
def test_transform_grocery_retail_points_outputs_chain_categories():
raw = pl.DataFrame(
{
"id": [101, 102, 103],
"retailer": ["Waitrose", "Sainsburys", "The Co-operative Group"],
"fascia": ["Waitrose", "Sainsbury's Local", "Co-op Food"],
"store_name": ["Waitrose Test", "Sainsbury''s Test", "Co-op Test"],
"long_wgs": [-0.141, -0.142, -0.143],
"lat_wgs": [51.515, 51.516, 51.517],
}
)
pois = transform_grocery_retail_points(raw, min_chain_locations=1)
assert pois.select(
"id", "name", "category", "icon_category", "group", "emoji"
).to_dicts() == [
{
"id": "glx-101",
"name": "Waitrose Test",
"category": "Waitrose",
"icon_category": "Waitrose",
"group": "Groceries",
"emoji": "🛒",
},
{
"id": "glx-102",
"name": "Sainsbury's Test",
"category": "Sainsbury's",
"icon_category": "Sainsbury's Local",
"group": "Groceries",
"emoji": "🛒",
},
{
"id": "glx-103",
"name": "Co-op Test",
"category": "Co-op",
"icon_category": "Co-op",
"group": "Groceries",
"emoji": "🛒",
},
]
def test_transform_grocery_retail_points_keeps_fascia_icon_category():
raw = pl.DataFrame(
{
"id": [101, 102, 103, 104],
"retailer": ["Tesco", "Iceland", "Waitrose", "Morrisons"],
"fascia": [
"Tesco Express Esso",
"The Food Warehouse",
"Little Waitrose Shell",
"Morrisons Daily",
],
"store_name": [
"Tesco Test Express",
"Iceland Test Food Warehouse",
"Little Waitrose Test",
"Morrisons Daily Test",
],
"long_wgs": [-0.141, -0.142, -0.143, -0.144],
"lat_wgs": [51.515, 51.516, 51.517, 51.518],
}
)
pois = transform_grocery_retail_points(raw, min_chain_locations=1)
assert pois.select("category", "icon_category").to_dicts() == [
{"category": "Tesco", "icon_category": "Tesco Express"},
{"category": "Iceland", "icon_category": "The Food Warehouse"},
{"category": "Waitrose", "icon_category": "Little Waitrose"},
{"category": "Morrisons", "icon_category": "Morrisons Daily"},
]
def test_transform_grocery_retail_points_merges_cooperative_societies():
raw = pl.DataFrame(
{
"id": [101, 102, 103],
"retailer": [
"Central England Co-operative",
"Lincolnshire Co-operative",
"The Southern Co-operative",
],
"fascia": [
"Central England Co-operative",
"The Co-operative Food",
None,
],
"store_name": [
"Central Co-op Test",
"Lincolnshire Co-op Test",
"Southern Co-op Test",
],
"long_wgs": [-0.141, -0.142, -0.143],
"lat_wgs": [51.515, 51.516, 51.517],
}
)
pois = transform_grocery_retail_points(raw, min_chain_locations=1)
assert pois.select("category", "icon_category").to_dicts() == [
{"category": "Co-op", "icon_category": "Co-op"},
{"category": "Co-op", "icon_category": "Co-op"},
{"category": "Co-op", "icon_category": "Co-op"},
]
def test_transform_grocery_retail_points_accepts_base_fascias():
raw = pl.DataFrame(
{
"id": [101, 102, 103, 104],
"retailer": ["Aldi", "Asda", "Booths", "Whole Foods Market"],
"fascia": ["Aldi", "Asda Superstore", "Booths", "Whole Foods Market"],
"store_name": [
"Aldi Test",
"Asda Test Superstore",
"Booths Test",
"Whole Foods Test",
],
"long_wgs": [-0.141, -0.142, -0.143, -0.144],
"lat_wgs": [51.515, 51.516, 51.517, 51.518],
}
)
pois = transform_grocery_retail_points(raw, min_chain_locations=1)
assert pois.select("category", "icon_category").to_dicts() == [
{"category": "Aldi", "icon_category": "Aldi"},
{"category": "Asda", "icon_category": "Asda Superstore"},
{"category": "Booths", "icon_category": "Booths"},
{"category": "Whole Foods Market", "icon_category": "Whole Foods Market"},
]
def test_transform_grocery_retail_points_drops_invalid_rows():
raw = pl.DataFrame(
{
"id": [101, 102],
"retailer": ["Waitrose", ""],
"fascia": ["Waitrose", "Tesco"],
"store_name": ["Waitrose Test", "Tesco Test"],
"long_wgs": [-0.141, -0.142],
"lat_wgs": [51.515, 51.516],
}
)
pois = transform_grocery_retail_points(raw, min_chain_locations=1)
assert pois["category"].to_list() == ["Waitrose"]
def test_transform_grocery_retail_points_includes_unmapped_chains_with_five_locations():
raw = pl.DataFrame(
{
"id": list(range(1, 10)),
"retailer": ["Tian Tian"] * 5 + ["Corner Shop"] * 4,
"fascia": ["Tian Tian Market"] * 5 + ["Corner Shop"] * 4,
"store_name": [f"Store {i}" for i in range(1, 10)],
"long_wgs": [-0.1] * 9,
"lat_wgs": [51.5] * 9,
}
)
pois = transform_grocery_retail_points(raw)
assert pois.select("id", "category", "icon_category").to_dicts() == [
{"id": "glx-1", "category": "Tian Tian", "icon_category": "Tian Tian"},
{"id": "glx-2", "category": "Tian Tian", "icon_category": "Tian Tian"},
{"id": "glx-3", "category": "Tian Tian", "icon_category": "Tian Tian"},
{"id": "glx-4", "category": "Tian Tian", "icon_category": "Tian Tian"},
{"id": "glx-5", "category": "Tian Tian", "icon_category": "Tian Tian"},
]