idk
This commit is contained in:
parent
a04ac2d857
commit
d43da9708c
47 changed files with 4120 additions and 573 deletions
|
|
@ -12,11 +12,19 @@ from pipeline.utils.poi_counts import count_pois_per_postcode, min_distance_per_
|
|||
|
||||
# POI category groups for proximity counting (2km radius).
|
||||
# Names must match the friendly names produced by transform_poi.py / naptan.py.
|
||||
# "groceries" is filled in dynamically by _groceries_categories() because the
|
||||
# GEOLYTIX dataset stores the brand (e.g. "Tesco", "Aldi") in `category` rather
|
||||
# than the literal "Supermarket"; counting only the OSM strings here severely
|
||||
# understates the metric. See _groceries_categories below.
|
||||
POI_GROUPS_2KM = {
|
||||
"restaurants": ["Restaurant", "Fast Food"],
|
||||
"groceries": ["Greengrocer", "Supermarket", "Convenience Store"],
|
||||
}
|
||||
|
||||
# POI group whose members are counted for the static "groceries" 2km metric.
|
||||
# Covers both the OSM grocery categories (Supermarket, Convenience Store,
|
||||
# Greengrocer, ...) and the GEOLYTIX brand categories (Tesco, Aldi, ...).
|
||||
GROCERIES_GROUP = "Groceries"
|
||||
|
||||
# OS Open Greenspace function types used for park counts and distance calculation.
|
||||
# Uses the authoritative OS dataset instead of OSM point POIs for better coverage
|
||||
# of green spaces that are only mapped as polygons in OSM.
|
||||
|
|
@ -41,6 +49,26 @@ def _poi_category_slug(category: str) -> str:
|
|||
return slug or "poi"
|
||||
|
||||
|
||||
def _groceries_categories(pois: pl.DataFrame) -> list[str]:
|
||||
"""Return the distinct `category` values for the Groceries group.
|
||||
|
||||
`count_pois_per_postcode` matches POIs on `category`, but the authoritative
|
||||
GEOLYTIX grocery dataset stores the brand name there (e.g. "Tesco", "Aldi")
|
||||
with group "Groceries"; it never emits the literal "Supermarket". Collecting
|
||||
every Groceries category captures both the OSM strings and the brand names.
|
||||
"""
|
||||
if "group" not in pois.columns:
|
||||
raise ValueError("POI dataframe must include a 'group' column")
|
||||
return (
|
||||
pois.filter(pl.col("group") == GROCERIES_GROUP)
|
||||
.select("category")
|
||||
.unique()
|
||||
.sort("category")
|
||||
.to_series()
|
||||
.to_list()
|
||||
)
|
||||
|
||||
|
||||
def _build_poi_category_groups(
|
||||
pois: pl.DataFrame,
|
||||
) -> tuple[dict[str, list[str]], dict[str, str]]:
|
||||
|
|
@ -122,9 +150,15 @@ def main():
|
|||
pois = pl.read_parquet(args.pois)
|
||||
poi_category_groups, poi_display_names = _build_poi_category_groups(pois)
|
||||
|
||||
# Count static amenity groups within 2km.
|
||||
# Count static amenity groups within 2km. "groceries" is matched against
|
||||
# every Groceries category (OSM strings + GEOLYTIX brand names) so that
|
||||
# postcodes ringed by GEOLYTIX-only chains (Tesco, Aldi, ...) are counted.
|
||||
groups_2km = {
|
||||
**POI_GROUPS_2KM,
|
||||
"groceries": _groceries_categories(pois),
|
||||
}
|
||||
counts_2km = count_pois_per_postcode(
|
||||
postcodes, pois, groups=POI_GROUPS_2KM, radius_km=2
|
||||
postcodes, pois, groups=groups_2km, radius_km=2
|
||||
)
|
||||
|
||||
# Dynamic amenity filters: nearest distance plus counts within 2km and 5km for
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue