idgf
This commit is contained in:
parent
fbfebc651c
commit
aab85fe32e
33 changed files with 2016 additions and 283 deletions
|
|
@ -6,6 +6,10 @@ import polars as pl
|
|||
from pipeline.utils.england_geometry import in_england_mask
|
||||
|
||||
DROP_CATEGORIES = {
|
||||
# GEOLYTIX Grocery Retail Points is the authoritative supermarket source
|
||||
# (transform_grocery_retail_points), so drop OSM supermarkets to avoid
|
||||
# double-counting each store as both a GEOLYTIX brand and an OSM "Supermarket".
|
||||
"shop/supermarket",
|
||||
# Street furniture & infrastructure
|
||||
"amenity/advice",
|
||||
"amenity/atm",
|
||||
|
|
@ -364,14 +368,6 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"leisure/yes",
|
||||
],
|
||||
),
|
||||
(
|
||||
"Groceries",
|
||||
"Supermarket",
|
||||
"🛒",
|
||||
[
|
||||
"shop/supermarket",
|
||||
],
|
||||
),
|
||||
(
|
||||
"Groceries",
|
||||
"Convenience Store",
|
||||
|
|
@ -1534,6 +1530,14 @@ def transform(
|
|||
pl.col("category").replace_strict(emoji_mapping).alias("emoji"),
|
||||
)
|
||||
|
||||
# A single OSM object can carry several tag keys that map to the same
|
||||
# friendly category (e.g. amenity/pharmacy + shop/chemist -> "Pharmacy"),
|
||||
# which pois.py emits as multiple raw rows sharing one id. Collapse those
|
||||
# duplicates so they don't inflate downstream proximity counts; rows sharing
|
||||
# an id with DIFFERENT categories are preserved. Other sources are
|
||||
# pre-deduplicated.
|
||||
lf = lf.unique(subset=["id", "category"], keep="first", maintain_order=True)
|
||||
|
||||
naptan_df = pl.scan_parquet(naptan_path).collect()
|
||||
mask = in_england_mask(
|
||||
boundary_path,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue