1314 lines
33 KiB
Python
1314 lines
33 KiB
Python
import argparse
|
|
from pathlib import Path
|
|
|
|
import polars as pl
|
|
|
|
from pipeline.utils.england_geometry import in_england_mask
|
|
|
|
|
|
DROP_CATEGORIES = {
|
|
# Street furniture & infrastructure
|
|
"amenity/advice",
|
|
"amenity/atm",
|
|
"amenity/bbq",
|
|
"amenity/bench",
|
|
"amenity/bicycle_parking",
|
|
"amenity/binoculars",
|
|
"amenity/boot_scraper",
|
|
"amenity/check_in",
|
|
"amenity/clock",
|
|
"amenity/compressed_air",
|
|
"amenity/donation_box",
|
|
"amenity/dressing_room",
|
|
"amenity/drinking_water",
|
|
"shop/taxi",
|
|
"amenity/feeding_place",
|
|
"amenity/fixme",
|
|
"amenity/grit_bin",
|
|
"amenity/hunting_stand",
|
|
"amenity/letter_box",
|
|
"amenity/loading_dock",
|
|
"amenity/lounge",
|
|
"tourism/preserved_railway",
|
|
"amenity/lounger",
|
|
"amenity/motorcycle_parking",
|
|
"amenity/mounting_block",
|
|
"amenity/notice_board",
|
|
"amenity/parcel_locker",
|
|
"amenity/parking",
|
|
"amenity/parking_entrance",
|
|
"amenity/parking_space",
|
|
"amenity/payment_terminal",
|
|
"amenity/photo_booth",
|
|
"amenity/piano",
|
|
"amenity/post_box",
|
|
"amenity/public_bookcase",
|
|
"amenity/reception_desk",
|
|
"amenity/sanitary_dump_station",
|
|
"amenity/shelter",
|
|
"amenity/shower",
|
|
"amenity/smoking_area",
|
|
"amenity/table",
|
|
"amenity/telephone",
|
|
"amenity/telescope",
|
|
"amenity/ticket_validator",
|
|
"amenity/toilets",
|
|
"amenity/trolley_bay",
|
|
"amenity/vacuum_cleaner",
|
|
"amenity/vending_machine",
|
|
"amenity/washing_machine",
|
|
"amenity/washingline",
|
|
"amenity/waste_basket",
|
|
"amenity/waste_disposal",
|
|
"amenity/waste_transfer_station",
|
|
"amenity/water_point",
|
|
"amenity/watering_place",
|
|
"amenity/weighbridge",
|
|
# Niche amenities not useful for home buyers
|
|
"amenity/animal_boarding",
|
|
"amenity/animal_breeding",
|
|
"amenity/animal_shelter",
|
|
"amenity/boat_storage",
|
|
"amenity/bureau_de_change",
|
|
"amenity/bus_station",
|
|
"amenity/conference_centre",
|
|
"amenity/crematorium",
|
|
"amenity/driving_school",
|
|
"amenity/escooter_rental",
|
|
"amenity/ferry_terminal",
|
|
"amenity/grave_yard",
|
|
"amenity/hall",
|
|
"shop/funeral_directors",
|
|
"amenity/kick-scooter_rental",
|
|
"amenity/money_transfer",
|
|
"amenity/post_depot",
|
|
"amenity/public_building",
|
|
"amenity/recycling",
|
|
"amenity/scout_hut",
|
|
"amenity/social_facility",
|
|
"amenity/studio",
|
|
"amenity/taxi",
|
|
"amenity/training",
|
|
"amenity/vehicle_inspection",
|
|
# Buildings (except church & university which are mapped)
|
|
"building/air_shaft",
|
|
"building/apartments",
|
|
"building/barn",
|
|
"building/bunker",
|
|
"building/chapel",
|
|
"building/commercial",
|
|
"building/construction",
|
|
"building/detached",
|
|
"building/entrance",
|
|
"building/entry",
|
|
"building/farm",
|
|
"building/farm_auxiliary",
|
|
"building/garage",
|
|
"building/garages",
|
|
"building/greenhouse",
|
|
"building/house",
|
|
"building/hut",
|
|
"building/industrial",
|
|
"building/kiosk",
|
|
"building/no",
|
|
"building/office",
|
|
"building/public",
|
|
"building/residential",
|
|
"building/retail",
|
|
"building/roof",
|
|
"building/ruins",
|
|
"building/school",
|
|
"building/semidetached_house",
|
|
"building/service",
|
|
"building/shed",
|
|
"building/terrace",
|
|
"building/warehouse",
|
|
"building/yes",
|
|
# All emergency
|
|
"emergency/access_point",
|
|
"emergency/assembly_point",
|
|
"emergency/bleed_control_kit",
|
|
"emergency/defibrillator",
|
|
"emergency/designated",
|
|
"emergency/dry_riser_inlet",
|
|
"emergency/emergency_ward_entrance",
|
|
"emergency/fire_alarm_box",
|
|
"emergency/fire_extinguisher",
|
|
"emergency/fire_hydrant",
|
|
"emergency/fire_service_inlet",
|
|
"emergency/first_aid_kit",
|
|
"emergency/life_ring",
|
|
"emergency/lifeguard",
|
|
"emergency/no",
|
|
"emergency/phone",
|
|
"emergency/rescue_equipment",
|
|
"emergency/siren",
|
|
"emergency/throw_bag",
|
|
"emergency/water_rescue",
|
|
"emergency/yes",
|
|
"tourism/apartment",
|
|
"tourism/apartments",
|
|
"tourism/camp_pitch",
|
|
"tourism/caravan_site",
|
|
"tourism/information",
|
|
"tourism/picnic_site",
|
|
"tourism/viewpoint",
|
|
"tourism/village_sign",
|
|
"tourism/yes",
|
|
# Public transport (from NaPTAN instead)
|
|
"public_transport/entrance",
|
|
"public_transport/platform",
|
|
"public_transport/station",
|
|
"public_transport/stop_position",
|
|
}
|
|
|
|
|
|
# Each output category defined once: (group, friendly_name, emoji, [osm_keys...])
|
|
# The flat CATEGORY_MAP lookup dict is built from this at the bottom.
|
|
_CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|
(
|
|
"Leisure",
|
|
"Café",
|
|
"☕",
|
|
[
|
|
"amenity/cafe",
|
|
"amenity/ice_cream",
|
|
"amenity/internet_cafe",
|
|
],
|
|
),
|
|
(
|
|
"Leisure",
|
|
"Restaurant",
|
|
"🍽️",
|
|
[
|
|
"amenity/restaurant",
|
|
"amenity/food_court",
|
|
],
|
|
),
|
|
(
|
|
"Leisure",
|
|
"Pub",
|
|
"🍺",
|
|
[
|
|
"amenity/pub",
|
|
"amenity/social_club",
|
|
"amenity/club",
|
|
"leisure/social_club",
|
|
"craft/brewery",
|
|
"craft/distillery",
|
|
"craft/winery",
|
|
],
|
|
),
|
|
(
|
|
"Leisure",
|
|
"Bar",
|
|
"🍸",
|
|
[
|
|
"amenity/bar",
|
|
"amenity/hookah_lounge",
|
|
],
|
|
),
|
|
(
|
|
"Leisure",
|
|
"Fast Food",
|
|
"🍔",
|
|
[
|
|
"amenity/fast_food",
|
|
],
|
|
),
|
|
(
|
|
"Leisure",
|
|
"Nightclub",
|
|
"🪩",
|
|
[
|
|
"amenity/nightclub",
|
|
"amenity/stripclub",
|
|
"amenity/casino",
|
|
"amenity/gambling",
|
|
],
|
|
),
|
|
(
|
|
"Leisure",
|
|
"Cinema",
|
|
"🎬",
|
|
[
|
|
"amenity/cinema",
|
|
],
|
|
),
|
|
(
|
|
"Leisure",
|
|
"Theatre",
|
|
"🎭",
|
|
[
|
|
"amenity/theatre",
|
|
],
|
|
),
|
|
(
|
|
"Leisure",
|
|
"Live Music & Events",
|
|
"🎶",
|
|
[
|
|
"amenity/music_venue",
|
|
"amenity/events_venue",
|
|
"leisure/dance",
|
|
],
|
|
),
|
|
(
|
|
"Leisure",
|
|
"Park",
|
|
"🌳",
|
|
[
|
|
"leisure/park",
|
|
"leisure/garden",
|
|
"leisure/common",
|
|
"leisure/nature_reserve",
|
|
"leisure/dog_park",
|
|
"leisure/bandstand",
|
|
"leisure/bird_hide",
|
|
"leisure/firepit",
|
|
"leisure/outdoor_seating",
|
|
"leisure/picnic_table",
|
|
"leisure/wildlife_hide",
|
|
],
|
|
),
|
|
(
|
|
"Leisure",
|
|
"Playground",
|
|
"🛝",
|
|
[
|
|
"leisure/playground",
|
|
"leisure/indoor_play",
|
|
],
|
|
),
|
|
(
|
|
"Leisure",
|
|
"Sports Centre",
|
|
"🏟️",
|
|
[
|
|
"leisure/sports_centre",
|
|
"leisure/sports_hall",
|
|
"leisure/pitch",
|
|
"leisure/track",
|
|
"leisure/golf_course",
|
|
"leisure/miniature_golf",
|
|
"leisure/horse_riding",
|
|
"leisure/fishing",
|
|
"leisure/swimming_pool",
|
|
"leisure/water_park",
|
|
"leisure/bathing_place",
|
|
],
|
|
),
|
|
(
|
|
"Leisure",
|
|
"Entertainment",
|
|
"🎳",
|
|
[
|
|
"leisure/bowling_alley",
|
|
"leisure/amusement_arcade",
|
|
"leisure/adult_gaming_centre",
|
|
"leisure/escape_game",
|
|
"leisure/trampoline_park",
|
|
"leisure/sauna",
|
|
"leisure/tanning_salon",
|
|
"tourism/theme_park",
|
|
"amenity/bicycle_rental",
|
|
"amenity/boat_rental",
|
|
"leisure/marina",
|
|
"leisure/slipway",
|
|
"leisure/hackerspace",
|
|
"leisure/yes",
|
|
],
|
|
),
|
|
(
|
|
"Groceries",
|
|
"Supermarket",
|
|
"🛒",
|
|
[
|
|
"shop/supermarket",
|
|
],
|
|
),
|
|
(
|
|
"Groceries",
|
|
"Convenience Store",
|
|
"🏪",
|
|
[
|
|
"shop/convenience",
|
|
"shop/general",
|
|
"shop/kiosk",
|
|
"shop/grocery",
|
|
],
|
|
),
|
|
(
|
|
"Groceries",
|
|
"Bakery",
|
|
"🥐",
|
|
[
|
|
"shop/bakery",
|
|
"shop/pastry",
|
|
"craft/confectionery",
|
|
],
|
|
),
|
|
(
|
|
"Groceries",
|
|
"Butcher & Fishmonger",
|
|
"🥩",
|
|
[
|
|
"shop/butcher",
|
|
"shop/seafood",
|
|
],
|
|
),
|
|
(
|
|
"Groceries",
|
|
"Greengrocer",
|
|
"🥬",
|
|
[
|
|
"shop/greengrocer",
|
|
"shop/farm",
|
|
"amenity/marketplace",
|
|
],
|
|
),
|
|
(
|
|
"Groceries",
|
|
"Off-Licence",
|
|
"🍷",
|
|
[
|
|
"shop/alcohol",
|
|
"shop/wine",
|
|
"shop/beverages",
|
|
],
|
|
),
|
|
(
|
|
"Groceries",
|
|
"Deli & Specialty",
|
|
"🧆",
|
|
[
|
|
"shop/deli",
|
|
"shop/cheese",
|
|
"shop/chocolate",
|
|
"shop/coffee",
|
|
"shop/confectionery",
|
|
"shop/dairy",
|
|
"shop/food",
|
|
"shop/frozen_food",
|
|
"shop/health_food",
|
|
"shop/ice_cream",
|
|
"shop/nutrition_supplements",
|
|
"shop/tea",
|
|
],
|
|
),
|
|
(
|
|
"Shops",
|
|
"Fashion & Clothing",
|
|
"👕",
|
|
[
|
|
"shop/clothes",
|
|
"shop/boutique",
|
|
"shop/shoes",
|
|
"shop/accessories",
|
|
"shop/bag",
|
|
"shop/fashion_accessories",
|
|
"shop/jewelry",
|
|
"shop/leather",
|
|
"shop/watches",
|
|
],
|
|
),
|
|
(
|
|
"Shops",
|
|
"Electronics",
|
|
"📱",
|
|
[
|
|
"shop/electronics",
|
|
"shop/mobile_phone",
|
|
"shop/mobile_phone_accessories",
|
|
"shop/computer",
|
|
"shop/appliance",
|
|
"shop/electrical",
|
|
"shop/hifi",
|
|
"shop/video_games",
|
|
"shop/games",
|
|
],
|
|
),
|
|
(
|
|
"Shops",
|
|
"Charity Shop",
|
|
"❤️",
|
|
[
|
|
"shop/charity",
|
|
"shop/second_hand",
|
|
],
|
|
),
|
|
(
|
|
"Shops",
|
|
"DIY & Hardware",
|
|
"🔨",
|
|
[
|
|
"shop/doityourself",
|
|
"shop/hardware",
|
|
"shop/paint",
|
|
],
|
|
),
|
|
(
|
|
"Shops",
|
|
"Home & Garden",
|
|
"🪑",
|
|
[
|
|
"shop/furniture",
|
|
"shop/garden_centre",
|
|
"shop/kitchen",
|
|
"shop/bathroom",
|
|
"shop/bathroom_furnishing",
|
|
"shop/bed",
|
|
"shop/carpet",
|
|
"shop/curtain",
|
|
"shop/flooring",
|
|
"shop/fireplace",
|
|
"shop/household",
|
|
"shop/household_linen",
|
|
"shop/houseware",
|
|
"shop/interior_decoration",
|
|
"shop/lighting",
|
|
"shop/window_blind",
|
|
],
|
|
),
|
|
(
|
|
"Shops",
|
|
"Bookshop",
|
|
"📚",
|
|
[
|
|
"shop/books",
|
|
"shop/stationery",
|
|
],
|
|
),
|
|
(
|
|
"Shops",
|
|
"Pet Shop",
|
|
"🐾",
|
|
[
|
|
"shop/pet",
|
|
],
|
|
),
|
|
(
|
|
"Shops",
|
|
"Sports & Outdoor",
|
|
"🏕️",
|
|
[
|
|
"shop/sports",
|
|
"shop/outdoor",
|
|
"shop/bicycle",
|
|
],
|
|
),
|
|
(
|
|
"Shops",
|
|
"Newsagent",
|
|
"📰",
|
|
[
|
|
"shop/newsagent",
|
|
"shop/tobacco",
|
|
],
|
|
),
|
|
(
|
|
"Shops",
|
|
"Department Store",
|
|
"🏬",
|
|
[
|
|
"shop/department_store",
|
|
"shop/mall",
|
|
"shop/variety_store",
|
|
"shop/discount",
|
|
],
|
|
),
|
|
(
|
|
"Shops",
|
|
"Gift & Hobby",
|
|
"🎁",
|
|
[
|
|
"shop/gift",
|
|
"shop/florist",
|
|
"shop/toys",
|
|
"shop/craft",
|
|
"shop/candles",
|
|
"shop/party",
|
|
"shop/art",
|
|
"shop/music",
|
|
"shop/musical_instrument",
|
|
"shop/antiques",
|
|
"shop/baby_goods",
|
|
"shop/fabric",
|
|
"shop/haberdashery",
|
|
"shop/wool",
|
|
"shop/pottery",
|
|
],
|
|
),
|
|
(
|
|
"Shops",
|
|
"Specialist Shop",
|
|
"🏪",
|
|
[
|
|
"shop/agrarian",
|
|
"shop/boat",
|
|
"shop/bookmaker",
|
|
"shop/building_materials",
|
|
"shop/camera",
|
|
"shop/car",
|
|
"shop/caravan",
|
|
"shop/catalogue",
|
|
"shop/collector",
|
|
"shop/copyshop",
|
|
"shop/country_store",
|
|
"shop/doors",
|
|
"shop/e-cigarette",
|
|
"shop/erotic",
|
|
"shop/esoteric",
|
|
"shop/fan",
|
|
"shop/fishing",
|
|
"shop/frame",
|
|
"shop/fuel",
|
|
"shop/gas",
|
|
"shop/hairdresser_supply",
|
|
"shop/military_surplus",
|
|
"shop/model",
|
|
"shop/money_lender",
|
|
"shop/motorcycle",
|
|
"shop/outpost",
|
|
"shop/pawnbroker",
|
|
"shop/photo",
|
|
"shop/plant_hire",
|
|
"shop/printer_ink",
|
|
"shop/printing",
|
|
"shop/psychic",
|
|
"shop/pyrotechnics",
|
|
"shop/religion",
|
|
"shop/rental",
|
|
"shop/scuba_diving",
|
|
"shop/security",
|
|
"shop/sewing",
|
|
"shop/storage_rental",
|
|
"shop/swimming_pool",
|
|
"shop/telecommunication",
|
|
"shop/ticket",
|
|
"shop/tiles",
|
|
"shop/tool_hire",
|
|
"shop/trade",
|
|
"shop/trophy",
|
|
"shop/vacant",
|
|
"shop/video",
|
|
"shop/water_sports",
|
|
"shop/weapons",
|
|
"shop/wedding",
|
|
"shop/wholesale",
|
|
"shop/wigs",
|
|
"shop/yes",
|
|
],
|
|
),
|
|
# ── Services ─────────────────────────────────────────────
|
|
(
|
|
"Services",
|
|
"Hairdresser & Beauty",
|
|
"💇",
|
|
[
|
|
"shop/hairdresser",
|
|
"shop/beauty",
|
|
"shop/cosmetics",
|
|
"shop/massage",
|
|
"shop/perfumery",
|
|
],
|
|
),
|
|
(
|
|
"Services",
|
|
"Gym & Fitness",
|
|
"🏋️",
|
|
[
|
|
"leisure/fitness_centre",
|
|
"leisure/fitness_station",
|
|
"amenity/dojo",
|
|
"amenity/dancing_school",
|
|
],
|
|
),
|
|
(
|
|
"Services",
|
|
"Dry Cleaner & Laundry",
|
|
"👔",
|
|
[
|
|
"shop/dry_cleaning",
|
|
"shop/laundry",
|
|
"shop/tailor",
|
|
"shop/shoe_repair",
|
|
"shop/repair",
|
|
"craft/cleaning",
|
|
"craft/dressmaker",
|
|
"craft/shoemaker",
|
|
"craft/tailor",
|
|
],
|
|
),
|
|
(
|
|
"Services",
|
|
"Car Services",
|
|
"🔧",
|
|
[
|
|
"shop/car_repair",
|
|
"shop/car;car_repair",
|
|
"shop/car_parts",
|
|
"shop/motorcycle_repair",
|
|
"shop/tyres",
|
|
"amenity/car_wash",
|
|
"amenity/car_rental",
|
|
"amenity/car_sharing",
|
|
"amenity/bicycle_repair_station",
|
|
],
|
|
),
|
|
(
|
|
"Services",
|
|
"Post Office",
|
|
"🏤",
|
|
[
|
|
"amenity/post_office",
|
|
],
|
|
),
|
|
(
|
|
"Services",
|
|
"Vet & Pet Care",
|
|
"🐕",
|
|
[
|
|
"amenity/veterinary",
|
|
"shop/pet_grooming",
|
|
],
|
|
),
|
|
(
|
|
"Services",
|
|
"Bank",
|
|
"🏦",
|
|
[
|
|
"amenity/bank",
|
|
],
|
|
),
|
|
(
|
|
"Services",
|
|
"Travel Agent",
|
|
"✈️",
|
|
[
|
|
"shop/travel_agency",
|
|
"office/travel_agent",
|
|
],
|
|
),
|
|
(
|
|
"Services",
|
|
"Other",
|
|
"🛎️",
|
|
[
|
|
"shop/tattoo",
|
|
"shop/piercing",
|
|
"shop/locksmith",
|
|
"craft/key_cutter",
|
|
],
|
|
),
|
|
(
|
|
"Emergency Services",
|
|
"Police",
|
|
"👮",
|
|
["amenity/police"],
|
|
),
|
|
(
|
|
"Emergency Services",
|
|
"Fire Station",
|
|
"🚒",
|
|
["amenity/fire_station"],
|
|
),
|
|
(
|
|
"Emergency Services",
|
|
"Ambulance Station",
|
|
"🚑",
|
|
["emergency/ambulance_station"],
|
|
),
|
|
(
|
|
"Health",
|
|
"GP Surgery",
|
|
"👨⚕️",
|
|
[
|
|
"amenity/doctors",
|
|
"healthcare/doctor",
|
|
],
|
|
),
|
|
(
|
|
"Health",
|
|
"Dentist",
|
|
"🦷",
|
|
[
|
|
"amenity/dentist",
|
|
"healthcare/dentist",
|
|
],
|
|
),
|
|
(
|
|
"Health",
|
|
"Pharmacy",
|
|
"💊",
|
|
[
|
|
"amenity/pharmacy",
|
|
"healthcare/pharmacy",
|
|
"shop/chemist",
|
|
"shop/herbalist",
|
|
"shop/health",
|
|
"healthcare/alternative",
|
|
],
|
|
),
|
|
(
|
|
"Health",
|
|
"Hospital & Clinic",
|
|
"🏥",
|
|
[
|
|
"amenity/hospital",
|
|
"amenity/clinic",
|
|
"healthcare/hospital",
|
|
"healthcare/centre",
|
|
"healthcare/clinic",
|
|
"office/healthcare",
|
|
"healthcare/laboratory",
|
|
"healthcare/rehabilitation",
|
|
"healthcare/vaccination_centre",
|
|
"healthcare/yes",
|
|
],
|
|
),
|
|
(
|
|
"Health",
|
|
"Optician",
|
|
"👓",
|
|
[
|
|
"shop/optician",
|
|
"healthcare/optometrist",
|
|
"shop/hearing_aids",
|
|
"healthcare/audiologist",
|
|
],
|
|
),
|
|
(
|
|
"Health",
|
|
"Physiotherapy",
|
|
"🏃",
|
|
[
|
|
"healthcare/physiotherapist",
|
|
"healthcare/podiatrist",
|
|
],
|
|
),
|
|
(
|
|
"Health",
|
|
"Counselling & Therapy",
|
|
"🧠",
|
|
[
|
|
"healthcare/counselling",
|
|
"healthcare/psychotherapist",
|
|
"office/therapist",
|
|
],
|
|
),
|
|
(
|
|
"Health",
|
|
"Care Home",
|
|
"🏠",
|
|
[
|
|
"amenity/care_home",
|
|
"amenity/nursing_home",
|
|
"office/home_care",
|
|
],
|
|
),
|
|
(
|
|
"Health",
|
|
"Medical & Mobility",
|
|
"♿",
|
|
[
|
|
"shop/medical_supply",
|
|
"shop/mobility",
|
|
"shop/mobility_scooter",
|
|
],
|
|
),
|
|
(
|
|
"Culture",
|
|
"Museum",
|
|
"🏛️",
|
|
[
|
|
"tourism/museum",
|
|
],
|
|
),
|
|
(
|
|
"Culture",
|
|
"Gallery",
|
|
"🖼️",
|
|
[
|
|
"tourism/gallery",
|
|
"tourism/artwork",
|
|
],
|
|
),
|
|
(
|
|
"Culture",
|
|
"Library",
|
|
"📚",
|
|
[
|
|
"amenity/library",
|
|
],
|
|
),
|
|
(
|
|
"Culture",
|
|
"Place of Worship",
|
|
"⛪",
|
|
[
|
|
"amenity/place_of_worship",
|
|
"building/church",
|
|
],
|
|
),
|
|
(
|
|
"Culture",
|
|
"Arts Centre",
|
|
"🎨",
|
|
[
|
|
"amenity/arts_centre",
|
|
],
|
|
),
|
|
(
|
|
"Culture",
|
|
"Zoo",
|
|
"🦁",
|
|
[
|
|
"tourism/zoo",
|
|
],
|
|
),
|
|
(
|
|
"Culture",
|
|
"Tourist Attraction",
|
|
"📸",
|
|
[
|
|
"tourism/attraction",
|
|
"amenity/fountain",
|
|
"amenity/courthouse",
|
|
"tourism/chalet",
|
|
],
|
|
),
|
|
(
|
|
"Education",
|
|
"School",
|
|
"🏫",
|
|
[
|
|
"amenity/school",
|
|
"amenity/prep_school",
|
|
"amenity/language_school",
|
|
"amenity/music_school",
|
|
"amenity/university",
|
|
"amenity/college",
|
|
"building/university",
|
|
"amenity/kindergarten",
|
|
"amenity/childcare",
|
|
],
|
|
),
|
|
(
|
|
"Local Businesses",
|
|
"Hotel",
|
|
"🏨",
|
|
[
|
|
"tourism/hotel",
|
|
"tourism/hostel",
|
|
"tourism/guest_house",
|
|
"tourism/motel",
|
|
"tourism/camp_site",
|
|
],
|
|
),
|
|
(
|
|
"Local Businesses",
|
|
"Local Business",
|
|
"🛠️",
|
|
[
|
|
# Tradespeople
|
|
"craft/builder",
|
|
"craft/carpenter",
|
|
"craft/electrician",
|
|
"craft/electronics_repair",
|
|
"craft/floorer",
|
|
"craft/gardener",
|
|
"craft/glaziery",
|
|
"craft/hvac",
|
|
"craft/joiner",
|
|
"craft/locksmith",
|
|
"craft/painter",
|
|
"craft/plumber",
|
|
"craft/roofer",
|
|
"craft/window_construction",
|
|
"craft/agricultural_engines",
|
|
"craft/atelier",
|
|
"craft/blacksmith",
|
|
"craft/bookbinder",
|
|
"craft/caterer",
|
|
"craft/handicraft",
|
|
"craft/jeweller",
|
|
"craft/metal_construction",
|
|
"craft/photographer",
|
|
"craft/photographic_laboratory",
|
|
"craft/pottery",
|
|
"craft/printer",
|
|
"craft/sawmill",
|
|
"craft/scaffolder",
|
|
"craft/sculptor",
|
|
"craft/signmaker",
|
|
"craft/stonemason",
|
|
"craft/upholsterer",
|
|
"craft/watchmaker",
|
|
"craft/yes",
|
|
"shop/glaziery",
|
|
"shop/windows",
|
|
# Professional offices & estate agents
|
|
"shop/estate_agent",
|
|
"office/accountant",
|
|
"office/architect",
|
|
"office/construction_company",
|
|
"office/engineer",
|
|
"office/estate_agent",
|
|
"office/financial",
|
|
"office/financial_advisor",
|
|
"office/insurance",
|
|
"office/lawyer",
|
|
"office/mortgage",
|
|
"office/property_management",
|
|
"office/solicitor",
|
|
"office/surveyor",
|
|
"office/tax_advisor",
|
|
],
|
|
),
|
|
(
|
|
"Local Businesses",
|
|
"Offices",
|
|
"🏢",
|
|
[
|
|
"amenity/coworking_space",
|
|
"office/advertising_agency",
|
|
"office/association",
|
|
"office/charity",
|
|
"office/company",
|
|
"office/consulting",
|
|
"office/courier",
|
|
"office/coworking",
|
|
"office/design",
|
|
"office/diplomatic",
|
|
"office/educational_institution",
|
|
"office/employment_agency",
|
|
"office/energy_supplier",
|
|
"office/foundation",
|
|
"office/government",
|
|
"office/graphic_design",
|
|
"office/interior_design",
|
|
"office/it",
|
|
"office/logistics",
|
|
"office/marketing",
|
|
"office/moving_company",
|
|
"office/newspaper",
|
|
"office/ngo",
|
|
"office/notary",
|
|
"office/political_party",
|
|
"office/politician",
|
|
"office/recruitment",
|
|
"office/religion",
|
|
"office/research",
|
|
"office/security",
|
|
"office/taxi",
|
|
"office/telecommunication",
|
|
"office/union",
|
|
"office/university",
|
|
"office/vacant",
|
|
"office/web_design",
|
|
"office/yes",
|
|
],
|
|
),
|
|
# ── Other ────────────────────────────────────────────────
|
|
(
|
|
"Other",
|
|
"EV Charging",
|
|
"🔌",
|
|
[
|
|
"amenity/charging_station",
|
|
],
|
|
),
|
|
(
|
|
"Other",
|
|
"Fuel Station",
|
|
"⛽",
|
|
[
|
|
"amenity/fuel",
|
|
],
|
|
),
|
|
(
|
|
"Other",
|
|
"Community Centre",
|
|
"🤝",
|
|
[
|
|
"amenity/community_centre",
|
|
"amenity/social_centre",
|
|
"amenity/townhall",
|
|
],
|
|
),
|
|
]
|
|
|
|
# Build flat lookup: OSM category → (group, friendly_name, emoji)
|
|
CATEGORY_MAP: dict[str, tuple[str, str, str]] = {
|
|
osm_key: (group, name, emoji)
|
|
for group, name, emoji, osm_keys in _CATEGORIES
|
|
for osm_key in osm_keys
|
|
}
|
|
|
|
|
|
NAPTAN_EMOJIS: dict[str, str] = {
|
|
"Airport": "✈️",
|
|
"Ferry": "⛴️",
|
|
"Rail station": "🚆",
|
|
"Bus stop": "🚏",
|
|
"Bus station": "🚌",
|
|
"Taxi rank": "🚕",
|
|
"Tube station": "🚇",
|
|
}
|
|
|
|
|
|
COOP_RETAILERS = {
|
|
"Allendale Co-operative Society",
|
|
"Central England Co-operative",
|
|
"Channel Islands Co-operative Society",
|
|
"Chelmsford Star Co-operative Society",
|
|
"Clydebank Co-operative",
|
|
"Coniston Co-operative Society",
|
|
"East of England Co-operative",
|
|
"Heart of England Co-operative",
|
|
"Langdale Co-operative Society",
|
|
"Lincolnshire Co-operative",
|
|
"Midcounties Co-operative",
|
|
"Scottish Midland Co-operative",
|
|
"Tamworth Co-operative Society",
|
|
"The Co-operative Group",
|
|
"The Radstock Co-operative Society",
|
|
"The Southern Co-operative",
|
|
}
|
|
|
|
GROCERY_RETAILER_DISPLAY_NAMES: dict[str, str] = {
|
|
"Cook": "COOK",
|
|
"Heron": "Heron Foods",
|
|
"Marks and Spencer": "M&S",
|
|
"Sainsburys": "Sainsbury's",
|
|
**{retailer: "Co-op" for retailer in COOP_RETAILERS},
|
|
}
|
|
|
|
|
|
GROCERY_FASCIA_ICON_NAMES: dict[str, str] = {
|
|
"Aldi Local": "Aldi",
|
|
"Asda Express": "Asda Express",
|
|
"Asda Living": "Asda Living",
|
|
"Asda PFS": "Asda PFS",
|
|
"Cooltrader": "Heron Foods",
|
|
"Co-op Food": "Co-op",
|
|
"Cook": "COOK",
|
|
"Eurospar": "Spar",
|
|
"Eurospar PFS": "Spar",
|
|
"Heron": "Heron Foods",
|
|
"Little Waitrose": "Little Waitrose",
|
|
"Little Waitrose Shell": "Little Waitrose",
|
|
"Marks and Spencer": "M&S",
|
|
"Marks and Spencer BP": "M&S Food",
|
|
"Marks and Spencer Clothing": "M&S Clothing",
|
|
"Marks and Spencer Food To Go": "M&S Food",
|
|
"Marks and Spencer Food Outlet": "M&S Outlet",
|
|
"Marks and Spencer Foodhall": "M&S Food",
|
|
"Marks and Spencer Hospital": "M&S Hospital",
|
|
"Marks and Spencer MSA": "M&S MSA",
|
|
"Marks and Spencer Outlet": "M&S Outlet",
|
|
"Marks and Spencer Simply Food": "M&S Food",
|
|
"Marks and Spencer Travel SF": "M&S Food",
|
|
"Morrisons Daily": "Morrisons Daily",
|
|
"Morrisons Select": "Morrisons",
|
|
"Sainsburys": "Sainsbury's",
|
|
"Sainsburys Local": "Sainsbury's Local",
|
|
"Spar PFS": "Spar",
|
|
"Tesco Express": "Tesco Express",
|
|
"Tesco Express Esso": "Tesco Express",
|
|
"Tesco Extra": "Tesco Extra",
|
|
"The Co-operative Food": "Co-op",
|
|
"The Co-operative Food PFS": "Co-op",
|
|
"The Food Warehouse": "The Food Warehouse",
|
|
"Waitrose MSA": "Waitrose",
|
|
}
|
|
|
|
|
|
def normalize_grocery_retailer(retailer: str | None) -> str:
|
|
if retailer is None:
|
|
return ""
|
|
return GROCERY_RETAILER_DISPLAY_NAMES.get(retailer, retailer)
|
|
|
|
|
|
def normalize_grocery_icon_category(fascia: str | None, retailer: str | None) -> str:
|
|
if fascia:
|
|
return GROCERY_FASCIA_ICON_NAMES.get(fascia, normalize_grocery_retailer(fascia))
|
|
return normalize_grocery_retailer(retailer)
|
|
|
|
|
|
def transform_grocery_retail_points(
|
|
grocery_df: pl.DataFrame,
|
|
boundary_path: Path | None = None,
|
|
) -> pl.DataFrame:
|
|
"""Convert GEOLYTIX Grocery Retail Points into the POI parquet schema."""
|
|
required = {"id", "retailer", "fascia", "store_name", "long_wgs", "lat_wgs"}
|
|
missing = required - set(grocery_df.columns)
|
|
if missing:
|
|
raise ValueError(f"GEOLYTIX retail points missing columns: {sorted(missing)}")
|
|
|
|
df = (
|
|
grocery_df.select(
|
|
pl.col("id").cast(pl.String),
|
|
pl.col("retailer").cast(pl.String),
|
|
pl.col("fascia").cast(pl.String),
|
|
pl.col("store_name").cast(pl.String),
|
|
pl.col("lat_wgs").cast(pl.Float64).alias("lat"),
|
|
pl.col("long_wgs").cast(pl.Float64).alias("lng"),
|
|
)
|
|
.drop_nulls(["id", "retailer", "lat", "lng"])
|
|
.filter(pl.col("retailer").str.len_chars() > 0)
|
|
)
|
|
|
|
if boundary_path is not None and len(df) > 0:
|
|
mask = in_england_mask(
|
|
boundary_path,
|
|
df["lat"].to_numpy(),
|
|
df["lng"].to_numpy(),
|
|
)
|
|
df = df.filter(pl.Series(mask))
|
|
|
|
return df.with_columns(
|
|
pl.concat_str([pl.lit("glx-"), pl.col("id")]).alias("id"),
|
|
pl.coalesce(["store_name", "fascia", "retailer"])
|
|
.str.replace_all("''", "'")
|
|
.alias("name"),
|
|
pl.col("retailer")
|
|
.map_elements(normalize_grocery_retailer, return_dtype=pl.String)
|
|
.alias("category"),
|
|
pl.struct(["fascia", "retailer"])
|
|
.map_elements(
|
|
lambda row: normalize_grocery_icon_category(row["fascia"], row["retailer"]),
|
|
return_dtype=pl.String,
|
|
)
|
|
.alias("icon_category"),
|
|
pl.lit("Groceries").alias("group"),
|
|
pl.lit("🛒").alias("emoji"),
|
|
).select("id", "name", "category", "icon_category", "group", "lat", "lng", "emoji")
|
|
|
|
|
|
def transform(
|
|
input_path: Path,
|
|
naptan_path: Path | None = None,
|
|
boundary_path: Path | None = None,
|
|
grocery_retail_points_path: Path | None = None,
|
|
) -> pl.LazyFrame:
|
|
lf = pl.scan_parquet(input_path)
|
|
|
|
# Get all unique categories present in the data
|
|
all_categories = (
|
|
lf.select("category").unique().collect(engine="streaming").to_series().to_list()
|
|
)
|
|
|
|
# Verify every non-dropped category has a mapping
|
|
unmapped = []
|
|
for cat in all_categories:
|
|
if cat not in DROP_CATEGORIES and cat not in CATEGORY_MAP:
|
|
unmapped.append(cat)
|
|
if unmapped:
|
|
raise ValueError(f"Categories missing from CATEGORY_MAP: {sorted(unmapped)}")
|
|
|
|
# Warn about CATEGORY_MAP keys not in data (may be absent in regional extracts)
|
|
mapped_but_absent = []
|
|
all_set = set(all_categories)
|
|
for cat in CATEGORY_MAP:
|
|
if cat not in all_set:
|
|
mapped_but_absent.append(cat)
|
|
if mapped_but_absent:
|
|
print(
|
|
f"CATEGORY_MAP categories not in data (skipped): {sorted(mapped_but_absent)}"
|
|
)
|
|
|
|
# Drop unwanted categories
|
|
lf = lf.filter(~pl.col("category").is_in(list(DROP_CATEGORIES)))
|
|
|
|
# Build lookup expressions from the 3-tuple mapping
|
|
group_mapping = {k: v[0] for k, v in CATEGORY_MAP.items()}
|
|
name_mapping = {k: v[1] for k, v in CATEGORY_MAP.items()}
|
|
emoji_mapping = {k: v[2] for k, v in CATEGORY_MAP.items()}
|
|
|
|
# Check no friendly names or emojis are empty (defensive)
|
|
missing_names = [k for k, v in CATEGORY_MAP.items() if not v[1]]
|
|
if missing_names:
|
|
raise ValueError(f"Empty friendly names for: {missing_names}")
|
|
missing_emojis = [k for k, v in CATEGORY_MAP.items() if not v[2]]
|
|
if missing_emojis:
|
|
raise ValueError(f"Empty emojis for: {missing_emojis}")
|
|
|
|
lf = lf.with_columns(
|
|
pl.col("category").replace_strict(group_mapping).alias("group"),
|
|
pl.col("category").replace_strict(name_mapping).alias("category"),
|
|
pl.col("category").replace_strict(name_mapping).alias("icon_category"),
|
|
pl.col("category").replace_strict(emoji_mapping).alias("emoji"),
|
|
)
|
|
|
|
naptan_df = pl.scan_parquet(naptan_path).collect()
|
|
if boundary_path is not None:
|
|
mask = in_england_mask(
|
|
boundary_path,
|
|
naptan_df["lat"].to_numpy(),
|
|
naptan_df["lng"].to_numpy(),
|
|
)
|
|
naptan_df = naptan_df.filter(pl.Series(mask))
|
|
naptan = naptan_df.lazy().with_columns(
|
|
pl.col("category").replace_strict(NAPTAN_EMOJIS).alias("emoji"),
|
|
pl.lit("Public Transport").alias("group"),
|
|
pl.col("category").alias("icon_category"),
|
|
)
|
|
|
|
frames = [lf, naptan]
|
|
if grocery_retail_points_path is not None:
|
|
grocery_df = pl.read_parquet(grocery_retail_points_path)
|
|
grocery_pois = transform_grocery_retail_points(grocery_df, boundary_path)
|
|
frames.append(grocery_pois.lazy())
|
|
|
|
return pl.concat(frames, how="diagonal_relaxed")
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Transform raw POIs to filtered version with friendly names"
|
|
)
|
|
parser.add_argument(
|
|
"--input", type=Path, required=True, help="Raw POIs parquet file"
|
|
)
|
|
parser.add_argument(
|
|
"--naptan", type=Path, required=True, help="NaPTAN stations parquet file"
|
|
)
|
|
parser.add_argument(
|
|
"--boundary",
|
|
type=Path,
|
|
required=True,
|
|
help="England boundary GeoJSON file",
|
|
)
|
|
parser.add_argument(
|
|
"--grocery-retail-points",
|
|
type=Path,
|
|
help="GEOLYTIX Grocery Retail Points parquet",
|
|
)
|
|
parser.add_argument(
|
|
"--output", type=Path, required=True, help="Output filtered POIs parquet file"
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
df = transform(
|
|
args.input,
|
|
args.naptan,
|
|
args.boundary,
|
|
args.grocery_retail_points,
|
|
).collect(engine="streaming")
|
|
|
|
df.write_parquet(args.output)
|
|
|
|
size_mb = args.output.stat().st_size / (1024 * 1024)
|
|
print(f"Wrote {args.output} ({size_mb:.1f} MB, {len(df):,} POIs)")
|
|
print(f"\nCategories ({df['category'].n_unique()}):")
|
|
counts = (
|
|
df.group_by("group", "category", "emoji").len().sort("len", descending=True)
|
|
)
|
|
for row in counts.iter_rows(named=True):
|
|
print(f" [{row['group']}] {row['emoji']} {row['category']}: {row['len']:,}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|