Rerun data pipelines

This commit is contained in:
Andras Schmelczer 2026-05-10 14:49:53 +01:00
parent 4c95815dc8
commit fc10381692
27 changed files with 2143 additions and 215 deletions

View file

@ -79,6 +79,33 @@ def test_transform_grocery_retail_points_keeps_fascia_icon_category():
]
def test_transform_grocery_retail_points_accepts_base_fascias():
raw = pl.DataFrame(
{
"id": [101, 102, 103, 104],
"retailer": ["Aldi", "Asda", "Booths", "Whole Foods Market"],
"fascia": ["Aldi", "Asda Superstore", "Booths", "Whole Foods Market"],
"store_name": [
"Aldi Test",
"Asda Test Superstore",
"Booths Test",
"Whole Foods Test",
],
"long_wgs": [-0.141, -0.142, -0.143, -0.144],
"lat_wgs": [51.515, 51.516, 51.517, 51.518],
}
)
pois = transform_grocery_retail_points(raw)
assert pois.select("category", "icon_category").to_dicts() == [
{"category": "Aldi", "icon_category": "Aldi"},
{"category": "Asda", "icon_category": "Asda Superstore"},
{"category": "Booths", "icon_category": "Booths"},
{"category": "Whole Foods Market", "icon_category": "Whole Foods Market"},
]
def test_transform_grocery_retail_points_drops_invalid_rows():
raw = pl.DataFrame(
{