perfect-postcode/pipeline/transform/test_transform_poi.py
2026-05-06 22:40:46 +01:00

96 lines
3 KiB
Python

import polars as pl
from pipeline.transform.transform_poi import transform_grocery_retail_points
def test_transform_grocery_retail_points_outputs_chain_categories():
raw = pl.DataFrame(
{
"id": [101, 102, 103],
"retailer": ["Waitrose", "Sainsburys", "The Co-operative Group"],
"fascia": ["Waitrose", "Sainsbury's Local", "Co-op Food"],
"store_name": ["Waitrose Test", "Sainsbury''s Test", "Co-op Test"],
"long_wgs": [-0.141, -0.142, -0.143],
"lat_wgs": [51.515, 51.516, 51.517],
}
)
pois = transform_grocery_retail_points(raw)
assert pois.select(
"id", "name", "category", "icon_category", "group", "emoji"
).to_dicts() == [
{
"id": "glx-101",
"name": "Waitrose Test",
"category": "Waitrose",
"icon_category": "Waitrose",
"group": "Groceries",
"emoji": "🛒",
},
{
"id": "glx-102",
"name": "Sainsbury's Test",
"category": "Sainsbury's",
"icon_category": "Sainsbury's Local",
"group": "Groceries",
"emoji": "🛒",
},
{
"id": "glx-103",
"name": "Co-op Test",
"category": "Co-op",
"icon_category": "Co-op",
"group": "Groceries",
"emoji": "🛒",
},
]
def test_transform_grocery_retail_points_keeps_fascia_icon_category():
raw = pl.DataFrame(
{
"id": [101, 102, 103, 104],
"retailer": ["Tesco", "Iceland", "Waitrose", "Morrisons"],
"fascia": [
"Tesco Express Esso",
"The Food Warehouse",
"Little Waitrose Shell",
"Morrisons Daily",
],
"store_name": [
"Tesco Test Express",
"Iceland Test Food Warehouse",
"Little Waitrose Test",
"Morrisons Daily Test",
],
"long_wgs": [-0.141, -0.142, -0.143, -0.144],
"lat_wgs": [51.515, 51.516, 51.517, 51.518],
}
)
pois = transform_grocery_retail_points(raw)
assert pois.select("category", "icon_category").to_dicts() == [
{"category": "Tesco", "icon_category": "Tesco Express"},
{"category": "Iceland", "icon_category": "The Food Warehouse"},
{"category": "Waitrose", "icon_category": "Little Waitrose"},
{"category": "Morrisons", "icon_category": "Morrisons Daily"},
]
def test_transform_grocery_retail_points_drops_invalid_rows():
raw = pl.DataFrame(
{
"id": [101, 102],
"retailer": ["Waitrose", ""],
"fascia": ["Waitrose", "Tesco"],
"store_name": ["Waitrose Test", "Tesco Test"],
"long_wgs": [-0.141, -0.142],
"lat_wgs": [51.515, 51.516],
}
)
pois = transform_grocery_retail_points(raw)
assert pois["category"].to_list() == ["Waitrose"]