More
This commit is contained in:
parent
cd34ee693f
commit
05a1f316e1
58 changed files with 3113 additions and 1277 deletions
|
|
@ -1058,10 +1058,91 @@ NAPTAN_EMOJIS: dict[str, str] = {
|
|||
}
|
||||
|
||||
|
||||
COOP_RETAILERS = {
|
||||
"Allendale Co-operative Society",
|
||||
"Central England Co-operative",
|
||||
"Channel Islands Co-operative Society",
|
||||
"Chelmsford Star Co-operative Society",
|
||||
"Clydebank Co-operative",
|
||||
"Coniston Co-operative Society",
|
||||
"East of England Co-operative",
|
||||
"Heart of England Co-operative",
|
||||
"Langdale Co-operative Society",
|
||||
"Lincolnshire Co-operative",
|
||||
"Midcounties Co-operative",
|
||||
"Scottish Midland Co-operative",
|
||||
"Tamworth Co-operative Society",
|
||||
"The Co-operative Group",
|
||||
"The Radstock Co-operative Society",
|
||||
"The Southern Co-operative",
|
||||
}
|
||||
|
||||
GROCERY_RETAILER_DISPLAY_NAMES: dict[str, str] = {
|
||||
"Cook": "COOK",
|
||||
"Heron": "Heron Foods",
|
||||
"Marks and Spencer": "M&S",
|
||||
"Sainsburys": "Sainsbury's",
|
||||
**{retailer: "Co-op" for retailer in COOP_RETAILERS},
|
||||
}
|
||||
|
||||
|
||||
def normalize_grocery_retailer(retailer: str | None) -> str:
|
||||
if retailer is None:
|
||||
return ""
|
||||
return GROCERY_RETAILER_DISPLAY_NAMES.get(retailer, retailer)
|
||||
|
||||
|
||||
def transform_grocery_retail_points(
|
||||
grocery_df: pl.DataFrame,
|
||||
boundary_path: Path | None = None,
|
||||
) -> pl.DataFrame:
|
||||
"""Convert GEOLYTIX Grocery Retail Points into the POI parquet schema."""
|
||||
required = {"id", "retailer", "fascia", "store_name", "long_wgs", "lat_wgs"}
|
||||
missing = required - set(grocery_df.columns)
|
||||
if missing:
|
||||
raise ValueError(
|
||||
f"GEOLYTIX retail points missing columns: {sorted(missing)}"
|
||||
)
|
||||
|
||||
df = (
|
||||
grocery_df.select(
|
||||
pl.col("id").cast(pl.String),
|
||||
pl.col("retailer").cast(pl.String),
|
||||
pl.col("fascia").cast(pl.String),
|
||||
pl.col("store_name").cast(pl.String),
|
||||
pl.col("lat_wgs").cast(pl.Float64).alias("lat"),
|
||||
pl.col("long_wgs").cast(pl.Float64).alias("lng"),
|
||||
)
|
||||
.drop_nulls(["id", "retailer", "lat", "lng"])
|
||||
.filter(pl.col("retailer").str.len_chars() > 0)
|
||||
)
|
||||
|
||||
if boundary_path is not None and len(df) > 0:
|
||||
mask = in_england_mask(
|
||||
boundary_path,
|
||||
df["lat"].to_numpy(),
|
||||
df["lng"].to_numpy(),
|
||||
)
|
||||
df = df.filter(pl.Series(mask))
|
||||
|
||||
return df.with_columns(
|
||||
pl.concat_str([pl.lit("glx-"), pl.col("id")]).alias("id"),
|
||||
pl.coalesce(["store_name", "fascia", "retailer"])
|
||||
.str.replace_all("''", "'")
|
||||
.alias("name"),
|
||||
pl.col("retailer")
|
||||
.map_elements(normalize_grocery_retailer, return_dtype=pl.String)
|
||||
.alias("category"),
|
||||
pl.lit("Groceries").alias("group"),
|
||||
pl.lit("🛒").alias("emoji"),
|
||||
).select("id", "name", "category", "group", "lat", "lng", "emoji")
|
||||
|
||||
|
||||
def transform(
|
||||
input_path: Path,
|
||||
naptan_path: Path | None = None,
|
||||
boundary_path: Path | None = None,
|
||||
grocery_retail_points_path: Path | None = None,
|
||||
) -> pl.LazyFrame:
|
||||
lf = pl.scan_parquet(input_path)
|
||||
|
||||
|
|
@ -1123,7 +1204,14 @@ def transform(
|
|||
pl.col("category").replace_strict(NAPTAN_EMOJIS).alias("emoji"),
|
||||
pl.lit("Public Transport").alias("group"),
|
||||
)
|
||||
return pl.concat([lf, naptan], how="diagonal_relaxed")
|
||||
|
||||
frames = [lf, naptan]
|
||||
if grocery_retail_points_path is not None:
|
||||
grocery_df = pl.read_parquet(grocery_retail_points_path)
|
||||
grocery_pois = transform_grocery_retail_points(grocery_df, boundary_path)
|
||||
frames.append(grocery_pois.lazy())
|
||||
|
||||
return pl.concat(frames, how="diagonal_relaxed")
|
||||
|
||||
|
||||
def main():
|
||||
|
|
@ -1142,12 +1230,22 @@ def main():
|
|||
required=True,
|
||||
help="England boundary GeoJSON file",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--grocery-retail-points",
|
||||
type=Path,
|
||||
help="GEOLYTIX Grocery Retail Points parquet",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output", type=Path, required=True, help="Output filtered POIs parquet file"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
df = transform(args.input, args.naptan, args.boundary).collect(engine="streaming")
|
||||
df = transform(
|
||||
args.input,
|
||||
args.naptan,
|
||||
args.boundary,
|
||||
args.grocery_retail_points,
|
||||
).collect(engine="streaming")
|
||||
|
||||
df.write_parquet(args.output)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue