all is well
This commit is contained in:
parent
eac1bd0d13
commit
2f149503bb
53 changed files with 1543 additions and 354 deletions
|
|
@ -5,7 +5,6 @@ import polars as pl
|
|||
|
||||
from pipeline.utils.england_geometry import in_england_mask
|
||||
|
||||
|
||||
DROP_CATEGORIES = {
|
||||
# Street furniture & infrastructure
|
||||
"amenity/advice",
|
||||
|
|
@ -1165,49 +1164,44 @@ COOP_RETAILERS = {
|
|||
"The Southern Co-operative",
|
||||
}
|
||||
|
||||
GROCERY_RETAILER_DISPLAY_NAMES: dict[str, str] = {
|
||||
"Aldi": "Aldi",
|
||||
"Asda": "Asda",
|
||||
"Booths": "Booths",
|
||||
"Budgens": "Budgens",
|
||||
"Centra": "Centra",
|
||||
MIN_GROCERY_CHAIN_LOCATIONS = 5
|
||||
|
||||
GROCERY_RETAILER_DISPLAY_NAME_OVERRIDES: dict[str, str] = {
|
||||
"Cook": "COOK",
|
||||
"Costco": "Costco",
|
||||
"Dunnes Stores": "Dunnes Stores",
|
||||
"Farmfoods": "Farmfoods",
|
||||
"Heron": "Heron Foods",
|
||||
"Iceland": "Iceland",
|
||||
"Lidl": "Lidl",
|
||||
"Makro": "Makro",
|
||||
"Marks and Spencer": "M&S",
|
||||
"Morrisons": "Morrisons",
|
||||
"Planet Organic": "Planet Organic",
|
||||
"Sainsburys": "Sainsbury's",
|
||||
"Spar": "Spar",
|
||||
"Tesco": "Tesco",
|
||||
"Waitrose": "Waitrose",
|
||||
"Whole Foods Market": "Whole Foods Market",
|
||||
**{retailer: "Co-op" for retailer in COOP_RETAILERS},
|
||||
"The Co-operative Group": "Co-op",
|
||||
}
|
||||
|
||||
|
||||
GROCERY_FASCIA_ICON_NAMES: dict[str, str] = {
|
||||
**GROCERY_RETAILER_DISPLAY_NAMES,
|
||||
"Aldi": "Aldi",
|
||||
"Aldi Local": "Aldi",
|
||||
"Asda": "Asda",
|
||||
"Asda Express": "Asda Express",
|
||||
"Asda Living": "Asda Living",
|
||||
"Asda PFS": "Asda PFS",
|
||||
"Asda PFS": "Asda",
|
||||
"Asda Supercentre": "Asda Supercentre",
|
||||
"Asda Supermarket": "Asda Supermarket",
|
||||
"Asda Superstore": "Asda Superstore",
|
||||
"Booths": "Booths",
|
||||
"Budgens": "Budgens",
|
||||
"Centra": "Centra",
|
||||
"Cooltrader": "Heron Foods",
|
||||
"Co-op Food": "Co-op",
|
||||
"Cook": "COOK",
|
||||
"Costco": "Costco",
|
||||
"Dunnes Stores": "Dunnes Stores",
|
||||
"Eurospar": "Spar",
|
||||
"Eurospar PFS": "Spar",
|
||||
"Farmfoods": "Farmfoods",
|
||||
"Heron": "Heron Foods",
|
||||
"Iceland": "Iceland",
|
||||
"Lidl": "Lidl",
|
||||
"Little Waitrose": "Little Waitrose",
|
||||
"Little Waitrose Shell": "Little Waitrose",
|
||||
"Makro": "Makro",
|
||||
"Marks and Spencer": "M&S",
|
||||
"Marks and Spencer BP": "M&S Food",
|
||||
"Marks and Spencer Clothing": "M&S Clothing",
|
||||
|
|
@ -1221,41 +1215,44 @@ GROCERY_FASCIA_ICON_NAMES: dict[str, str] = {
|
|||
"Marks and Spencer Travel SF": "M&S Food",
|
||||
"Morrisons Daily": "Morrisons Daily",
|
||||
"Morrisons Select": "Morrisons",
|
||||
"Planet Organic": "Planet Organic",
|
||||
"Sainsbury's Local": "Sainsbury's Local",
|
||||
"Sainsburys": "Sainsbury's",
|
||||
"Sainsburys Local": "Sainsbury's Local",
|
||||
"Spar": "Spar",
|
||||
"Spar PFS": "Spar",
|
||||
"Tesco": "Tesco",
|
||||
"Tesco Express": "Tesco Express",
|
||||
"Tesco Express Esso": "Tesco Express",
|
||||
"Tesco Extra": "Tesco Extra",
|
||||
"The Co-operative Food": "Co-op",
|
||||
"The Co-operative Food PFS": "Co-op",
|
||||
"The Food Warehouse": "The Food Warehouse",
|
||||
"Waitrose": "Waitrose",
|
||||
"Waitrose MSA": "Waitrose",
|
||||
"Whole Foods Market": "Whole Foods Market",
|
||||
}
|
||||
|
||||
|
||||
def normalize_grocery_retailer(retailer: str | None) -> str:
|
||||
if retailer is None:
|
||||
return ""
|
||||
display_name = GROCERY_RETAILER_DISPLAY_NAMES.get(retailer)
|
||||
if display_name is None:
|
||||
raise ValueError(f"Missing grocery retailer display name for {retailer!r}")
|
||||
return display_name
|
||||
retailer = retailer.strip()
|
||||
return GROCERY_RETAILER_DISPLAY_NAME_OVERRIDES.get(retailer, retailer)
|
||||
|
||||
|
||||
def normalize_grocery_icon_category(fascia: str | None, retailer: str | None) -> str:
|
||||
if fascia:
|
||||
icon_name = GROCERY_FASCIA_ICON_NAMES.get(fascia)
|
||||
if icon_name is None:
|
||||
raise ValueError(f"Missing grocery fascia icon name for {fascia!r}")
|
||||
return icon_name
|
||||
icon_name = GROCERY_FASCIA_ICON_NAMES.get(fascia.strip())
|
||||
if icon_name is not None:
|
||||
return icon_name
|
||||
return normalize_grocery_retailer(retailer)
|
||||
|
||||
|
||||
def transform_grocery_retail_points(
|
||||
grocery_df: pl.DataFrame,
|
||||
boundary_path: Path | None = None,
|
||||
min_chain_locations: int = MIN_GROCERY_CHAIN_LOCATIONS,
|
||||
) -> pl.DataFrame:
|
||||
"""Convert GEOLYTIX Grocery Retail Points into the POI parquet schema."""
|
||||
required = {"id", "retailer", "fascia", "store_name", "long_wgs", "lat_wgs"}
|
||||
|
|
@ -1272,6 +1269,11 @@ def transform_grocery_retail_points(
|
|||
pl.col("lat_wgs").cast(pl.Float64).alias("lat"),
|
||||
pl.col("long_wgs").cast(pl.Float64).alias("lng"),
|
||||
)
|
||||
.with_columns(
|
||||
pl.col("retailer").str.strip_chars(),
|
||||
pl.col("fascia").str.strip_chars(),
|
||||
pl.col("store_name").str.strip_chars(),
|
||||
)
|
||||
.drop_nulls(["id", "retailer", "lat", "lng"])
|
||||
.filter(pl.col("retailer").str.len_chars() > 0)
|
||||
)
|
||||
|
|
@ -1284,6 +1286,14 @@ def transform_grocery_retail_points(
|
|||
)
|
||||
df = df.filter(pl.Series(mask))
|
||||
|
||||
eligible_retailers = (
|
||||
df.group_by("retailer")
|
||||
.len()
|
||||
.filter(pl.col("len") >= min_chain_locations)
|
||||
.select("retailer")
|
||||
)
|
||||
df = df.join(eligible_retailers, on="retailer", how="semi")
|
||||
|
||||
return df.with_columns(
|
||||
pl.concat_str([pl.lit("glx-"), pl.col("id")]).alias("id"),
|
||||
pl.coalesce(["store_name", "fascia", "retailer"])
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue