This commit is contained in:
Andras Schmelczer 2026-05-06 22:40:46 +01:00
parent 28323f145e
commit 94f9c0d594
76 changed files with 3238 additions and 1230 deletions

View file

@ -13,13 +13,12 @@ _AREA_COLUMNS = [
"lat",
"lon",
# Deprivation
"Income Score (rate)",
"Employment Score (rate)",
"Income Score",
"Employment Score",
"Education, Skills and Training Score",
"Health Deprivation and Disability Score",
"Living Environment Score",
"Indoors Sub-domain Score",
"Outdoors Sub-domain Score",
"Housing Conditions Score",
"Air Quality and Road Safety Score",
# Ethnicity
"% South Asian",
"% East Asian",
@ -144,7 +143,6 @@ def _build(
"Income Score (rate)",
"Employment Score (rate)",
"Health Deprivation and Disability Score",
"Living Environment Score",
"Indoors Sub-domain Score",
"Outdoors Sub-domain Score",
]
@ -319,6 +317,7 @@ def _build(
"Adult Skills Sub-domain Score",
"Children and Young People Sub-domain Score",
"Crime Score",
"Living Environment Score",
"Index of Multiple Deprivation (IMD) Score",
"Income Deprivation Affecting Older People (IDAOPI) Score (rate)",
"Income Deprivation Affecting Children Index (IDACI) Score (rate)",
@ -335,6 +334,10 @@ def _build(
"date_of_transfer": "Date of last transaction",
"construction_age_band": "Construction year",
"is_construction_date_approximate": "Is construction date approximate",
"Income Score (rate)": "Income Score",
"Employment Score (rate)": "Employment Score",
"Indoors Sub-domain Score": "Housing Conditions Score",
"Outdoors Sub-domain Score": "Air Quality and Road Safety Score",
"pp_address": "Address per Property Register",
"epc_address": "Address per EPC",
"postcode": "Postcode",

View file

@ -17,11 +17,14 @@ def test_transform_grocery_retail_points_outputs_chain_categories():
pois = transform_grocery_retail_points(raw)
assert pois.select("id", "name", "category", "group", "emoji").to_dicts() == [
assert pois.select(
"id", "name", "category", "icon_category", "group", "emoji"
).to_dicts() == [
{
"id": "glx-101",
"name": "Waitrose Test",
"category": "Waitrose",
"icon_category": "Waitrose",
"group": "Groceries",
"emoji": "🛒",
},
@ -29,6 +32,7 @@ def test_transform_grocery_retail_points_outputs_chain_categories():
"id": "glx-102",
"name": "Sainsbury's Test",
"category": "Sainsbury's",
"icon_category": "Sainsbury's Local",
"group": "Groceries",
"emoji": "🛒",
},
@ -36,12 +40,45 @@ def test_transform_grocery_retail_points_outputs_chain_categories():
"id": "glx-103",
"name": "Co-op Test",
"category": "Co-op",
"icon_category": "Co-op",
"group": "Groceries",
"emoji": "🛒",
},
]
def test_transform_grocery_retail_points_keeps_fascia_icon_category():
raw = pl.DataFrame(
{
"id": [101, 102, 103, 104],
"retailer": ["Tesco", "Iceland", "Waitrose", "Morrisons"],
"fascia": [
"Tesco Express Esso",
"The Food Warehouse",
"Little Waitrose Shell",
"Morrisons Daily",
],
"store_name": [
"Tesco Test Express",
"Iceland Test Food Warehouse",
"Little Waitrose Test",
"Morrisons Daily Test",
],
"long_wgs": [-0.141, -0.142, -0.143, -0.144],
"lat_wgs": [51.515, 51.516, 51.517, 51.518],
}
)
pois = transform_grocery_retail_points(raw)
assert pois.select("category", "icon_category").to_dicts() == [
{"category": "Tesco", "icon_category": "Tesco Express"},
{"category": "Iceland", "icon_category": "The Food Warehouse"},
{"category": "Waitrose", "icon_category": "Little Waitrose"},
{"category": "Morrisons", "icon_category": "Morrisons Daily"},
]
def test_transform_grocery_retail_points_drops_invalid_rows():
raw = pl.DataFrame(
{

View file

@ -1086,12 +1086,56 @@ GROCERY_RETAILER_DISPLAY_NAMES: dict[str, str] = {
}
GROCERY_FASCIA_ICON_NAMES: dict[str, str] = {
"Aldi Local": "Aldi",
"Asda Express": "Asda Express",
"Asda Living": "Asda Living",
"Asda PFS": "Asda PFS",
"Cooltrader": "Heron Foods",
"Cook": "COOK",
"Eurospar": "Spar",
"Eurospar PFS": "Spar",
"Heron": "Heron Foods",
"Little Waitrose": "Little Waitrose",
"Little Waitrose Shell": "Little Waitrose",
"Marks and Spencer": "M&S",
"Marks and Spencer BP": "M&S Food",
"Marks and Spencer Clothing": "M&S Clothing",
"Marks and Spencer Food To Go": "M&S Food",
"Marks and Spencer Food Outlet": "M&S Outlet",
"Marks and Spencer Foodhall": "M&S Food",
"Marks and Spencer Hospital": "M&S Hospital",
"Marks and Spencer MSA": "M&S MSA",
"Marks and Spencer Outlet": "M&S Outlet",
"Marks and Spencer Simply Food": "M&S Food",
"Marks and Spencer Travel SF": "M&S Food",
"Morrisons Daily": "Morrisons Daily",
"Morrisons Select": "Morrisons",
"Sainsburys": "Sainsbury's",
"Sainsburys Local": "Sainsbury's Local",
"Spar PFS": "Spar",
"Tesco Express": "Tesco Express",
"Tesco Express Esso": "Tesco Express",
"Tesco Extra": "Tesco Extra",
"The Co-operative Food": "Co-op",
"The Co-operative Food PFS": "Co-op",
"The Food Warehouse": "The Food Warehouse",
"Waitrose MSA": "Waitrose",
}
def normalize_grocery_retailer(retailer: str | None) -> str:
if retailer is None:
return ""
return GROCERY_RETAILER_DISPLAY_NAMES.get(retailer, retailer)
def normalize_grocery_icon_category(fascia: str | None, retailer: str | None) -> str:
if fascia:
return GROCERY_FASCIA_ICON_NAMES.get(fascia, normalize_grocery_retailer(fascia))
return normalize_grocery_retailer(retailer)
def transform_grocery_retail_points(
grocery_df: pl.DataFrame,
boundary_path: Path | None = None,
@ -1133,9 +1177,15 @@ def transform_grocery_retail_points(
pl.col("retailer")
.map_elements(normalize_grocery_retailer, return_dtype=pl.String)
.alias("category"),
pl.struct(["fascia", "retailer"])
.map_elements(
lambda row: normalize_grocery_icon_category(row["fascia"], row["retailer"]),
return_dtype=pl.String,
)
.alias("icon_category"),
pl.lit("Groceries").alias("group"),
pl.lit("🛒").alias("emoji"),
).select("id", "name", "category", "group", "lat", "lng", "emoji")
).select("id", "name", "category", "icon_category", "group", "lat", "lng", "emoji")
def transform(
@ -1189,6 +1239,7 @@ def transform(
lf = lf.with_columns(
pl.col("category").replace_strict(group_mapping).alias("group"),
pl.col("category").replace_strict(name_mapping).alias("category"),
pl.col("category").replace_strict(name_mapping).alias("icon_category"),
pl.col("category").replace_strict(emoji_mapping).alias("emoji"),
)
@ -1203,6 +1254,7 @@ def transform(
naptan = naptan_df.lazy().with_columns(
pl.col("category").replace_strict(NAPTAN_EMOJIS).alias("emoji"),
pl.lit("Public Transport").alias("group"),
pl.col("category").alias("icon_category"),
)
frames = [lf, naptan]