648 lines
28 KiB
Python
648 lines
28 KiB
Python
import argparse
|
|
from pathlib import Path
|
|
|
|
import polars as pl
|
|
|
|
|
|
DROP_CATEGORIES = {
|
|
"amenity/advice",
|
|
"amenity/atm",
|
|
"amenity/bbq",
|
|
"amenity/bench",
|
|
"amenity/bicycle_parking",
|
|
"amenity/clock",
|
|
"amenity/fixme",
|
|
"amenity/grit_bin",
|
|
"amenity/hunting_stand",
|
|
"amenity/motorcycle_parking",
|
|
"amenity/notice_board",
|
|
"amenity/parking",
|
|
"amenity/parking_entrance",
|
|
"amenity/parking_space",
|
|
"amenity/post_box",
|
|
"amenity/telephone",
|
|
"amenity/toilets",
|
|
"amenity/vacuum_cleaner",
|
|
"amenity/waste_basket",
|
|
"building/air_shaft",
|
|
"building/apartments",
|
|
"building/detached",
|
|
"building/entrance",
|
|
"building/entry",
|
|
"building/garage",
|
|
"building/garages",
|
|
"building/house",
|
|
"building/hut",
|
|
"building/no",
|
|
"building/office",
|
|
"building/public",
|
|
"building/residential",
|
|
"building/roof",
|
|
"building/shed",
|
|
"building/terrace",
|
|
"building/yes",
|
|
"emergency/access_point",
|
|
"emergency/ambulance_station",
|
|
"emergency/assembly_point",
|
|
"emergency/bleed_control_kit",
|
|
"emergency/defibrillator",
|
|
"emergency/designated",
|
|
"emergency/dry_riser_inlet",
|
|
"emergency/emergency_ward_entrance",
|
|
"emergency/fire_alarm_box",
|
|
"emergency/fire_extinguisher",
|
|
"emergency/fire_hydrant",
|
|
"emergency/fire_service_inlet",
|
|
"emergency/first_aid_kit",
|
|
"emergency/life_ring",
|
|
"emergency/lifeguard",
|
|
"emergency/no",
|
|
"emergency/phone",
|
|
"emergency/rescue_equipment",
|
|
"emergency/siren",
|
|
"emergency/throw_bag",
|
|
"emergency/water_rescue",
|
|
"emergency/yes",
|
|
"leisure/firepit",
|
|
"leisure/fishing",
|
|
"leisure/picnic_table",
|
|
"office/company",
|
|
"office/yes",
|
|
"tourism/apartment",
|
|
"tourism/apartments",
|
|
"tourism/camp_pitch",
|
|
"tourism/information",
|
|
"tourism/village_sign",
|
|
"tourism/yes",
|
|
}
|
|
|
|
# (friendly_name, emoji) for every category we keep
|
|
CATEGORY_MAP: dict[str, tuple[str, str]] = {
|
|
# amenity
|
|
"amenity/animal_boarding": ("Animal Boarding", "🐾"),
|
|
"amenity/animal_breeding": ("Animal Breeding", "🐣"),
|
|
"amenity/animal_shelter": ("Animal Shelter", "🏠"),
|
|
"amenity/arts_centre": ("Arts Centre", "🎨"),
|
|
"amenity/bank": ("Bank", "🏦"),
|
|
"amenity/bar": ("Bar", "🍸"),
|
|
"amenity/bicycle_rental": ("Bike Rental", "🚲"),
|
|
"amenity/bicycle_repair_station": ("Bike Repair", "🔧"),
|
|
"amenity/binoculars": ("Public Binoculars", "🔭"),
|
|
"amenity/boat_rental": ("Boat Rental", "⛵"),
|
|
"amenity/boat_storage": ("Boat Storage", "🚢"),
|
|
"amenity/boot_scraper": ("Boot Scraper", "🥾"),
|
|
"amenity/bureau_de_change": ("Currency Exchange", "💱"),
|
|
"amenity/bus_station": ("Bus Station", "🚌"),
|
|
"amenity/cafe": ("Café", "☕"),
|
|
"amenity/car_rental": ("Car Rental", "🚗"),
|
|
"amenity/car_sharing": ("Car Sharing", "🚙"),
|
|
"amenity/car_wash": ("Car Wash", "🧽"),
|
|
"amenity/care_home": ("Care Home", "🏥"),
|
|
"amenity/casino": ("Casino", "🎰"),
|
|
"amenity/charging_station": ("EV Charging", "🔌"),
|
|
"amenity/check_in": ("Check-In Point", "✅"),
|
|
"amenity/childcare": ("Childcare", "👶"),
|
|
"amenity/cinema": ("Cinema", "🎬"),
|
|
"amenity/clinic": ("Clinic", "🩺"),
|
|
"amenity/club": ("Club", "🏛️"),
|
|
"amenity/college": ("College", "🎓"),
|
|
"amenity/community_centre": ("Community Centre", "🤝"),
|
|
"amenity/compressed_air": ("Compressed Air", "💨"),
|
|
"amenity/conference_centre": ("Conference Centre", "📋"),
|
|
"amenity/courthouse": ("Courthouse", "⚖️"),
|
|
"amenity/coworking_space": ("Co-working Space", "💻"),
|
|
"amenity/crematorium": ("Crematorium", "🕯️"),
|
|
"amenity/dancing_school": ("Dance School", "💃"),
|
|
"amenity/dentist": ("Dentist", "🦷"),
|
|
"amenity/doctors": ("Doctor", "👨⚕️"),
|
|
"amenity/dojo": ("Dojo", "🥋"),
|
|
"amenity/donation_box": ("Donation Box", "📦"),
|
|
"amenity/dressing_room": ("Dressing Room", "👗"),
|
|
"amenity/drinking_water": ("Drinking Water", "🚰"),
|
|
"amenity/driving_school": ("Driving School", "🚦"),
|
|
"amenity/escooter_rental": ("E-Scooter Rental", "🛴"),
|
|
"amenity/events_venue": ("Events Venue", "🎪"),
|
|
"amenity/fast_food": ("Fast Food", "🍔"),
|
|
"amenity/feeding_place": ("Feeding Place", "🍽️"),
|
|
"amenity/ferry_terminal": ("Ferry Terminal", "⛴️"),
|
|
"amenity/fire_station": ("Fire Station", "🚒"),
|
|
"amenity/food_court": ("Food Court", "🍴"),
|
|
"amenity/fountain": ("Fountain", "⛲"),
|
|
"amenity/fuel": ("Fuel Station", "⛽"),
|
|
"amenity/gambling": ("Gambling", "🎲"),
|
|
"amenity/grave_yard": ("Graveyard", "🪦"),
|
|
"amenity/hall": ("Hall", "🏛️"),
|
|
"amenity/hookah_lounge": ("Hookah Lounge", "💨"),
|
|
"amenity/hospital": ("Hospital", "🏥"),
|
|
"amenity/ice_cream": ("Ice Cream", "🍦"),
|
|
"amenity/internet_cafe": ("Internet Café", "🌐"),
|
|
"amenity/kick-scooter_rental": ("Kick Scooter Rental", "🛴"),
|
|
"amenity/kindergarten": ("Kindergarten", "💒"),
|
|
"amenity/language_school": ("Language School", "🗣️"),
|
|
"amenity/letter_box": ("Letter Box", "📮"),
|
|
"amenity/library": ("Library", "📚"),
|
|
"amenity/loading_dock": ("Loading Dock", "📥"),
|
|
"amenity/lounge": ("Lounge", "🛋️"),
|
|
"amenity/lounger": ("Public Lounger", "🪑"),
|
|
"amenity/marketplace": ("Market", "🛒"),
|
|
"amenity/money_transfer": ("Money Transfer", "💸"),
|
|
"amenity/mounting_block": ("Mounting Block", "🐴"),
|
|
"amenity/music_school": ("Music School", "🎵"),
|
|
"amenity/music_venue": ("Music Venue", "🎶"),
|
|
"amenity/nightclub": ("Nightclub", "🪩"),
|
|
"amenity/nursing_home": ("Nursing Home", "🏠"),
|
|
"amenity/parcel_locker": ("Parcel Locker", "📦"),
|
|
"amenity/payment_terminal": ("Payment Terminal", "💳"),
|
|
"amenity/pharmacy": ("Pharmacy", "💊"),
|
|
"amenity/photo_booth": ("Photo Booth", "📸"),
|
|
"amenity/piano": ("Public Piano", "🎹"),
|
|
"amenity/place_of_worship": ("Place of Worship", "⛪"),
|
|
"amenity/police": ("Police Station", "🚔"),
|
|
"amenity/post_depot": ("Post Depot", "📬"),
|
|
"amenity/post_office": ("Post Office", "🏤"),
|
|
"amenity/prep_school": ("Prep School", "📖"),
|
|
"amenity/pub": ("Pub", "🍺"),
|
|
"amenity/public_bookcase": ("Public Bookcase", "📕"),
|
|
"amenity/public_building": ("Public Building", "🏢"),
|
|
"amenity/reception_desk": ("Reception Desk", "🛎️"),
|
|
"amenity/recycling": ("Recycling", "♻️"),
|
|
"amenity/restaurant": ("Restaurant", "🍽️"),
|
|
"amenity/sanitary_dump_station": ("Sanitary Dump Station", "🚿"),
|
|
"amenity/school": ("School", "🏫"),
|
|
"amenity/scout_hut": ("Scout Hut", "⚜️"),
|
|
"amenity/shelter": ("Shelter", "🛖"),
|
|
"amenity/shower": ("Public Shower", "🚿"),
|
|
"amenity/smoking_area": ("Smoking Area", "🚬"),
|
|
"amenity/social_centre": ("Social Centre", "🏘️"),
|
|
"amenity/social_club": ("Social Club", "🤝"),
|
|
"amenity/social_facility": ("Social Facility", "🫂"),
|
|
"amenity/stripclub": ("Strip Club", "🔞"),
|
|
"amenity/studio": ("Studio", "🎙️"),
|
|
"amenity/table": ("Public Table", "🪑"),
|
|
"amenity/taxi": ("Taxi Stand", "🚕"),
|
|
"amenity/telescope": ("Public Telescope", "🔭"),
|
|
"amenity/theatre": ("Theatre", "🎭"),
|
|
"amenity/ticket_validator": ("Ticket Validator", "🎫"),
|
|
"amenity/townhall": ("Town Hall", "🏛️"),
|
|
"amenity/training": ("Training Centre", "📝"),
|
|
"amenity/trolley_bay": ("Trolley Bay", "🛒"),
|
|
"amenity/university": ("University", "🏫"),
|
|
"amenity/vehicle_inspection": ("Vehicle Inspection", "🔍"),
|
|
"amenity/vending_machine": ("Vending Machine", "🏧"),
|
|
"amenity/veterinary": ("Vet", "🐕"),
|
|
"amenity/washing_machine": ("Washing Machine", "🧺"),
|
|
"amenity/washingline": ("Washing Line", "👕"),
|
|
"amenity/waste_disposal": ("Waste Disposal", "🗑️"),
|
|
"amenity/waste_transfer_station": ("Waste Transfer Station", "🚛"),
|
|
"amenity/water_point": ("Water Point", "💧"),
|
|
"amenity/watering_place": ("Watering Place", "🚰"),
|
|
"amenity/weighbridge": ("Weighbridge", "⚖️"),
|
|
# building
|
|
"building/barn": ("Barn", "🏚️"),
|
|
"building/bunker": ("Bunker", "🏗️"),
|
|
"building/chapel": ("Chapel", "⛪"),
|
|
"building/church": ("Church", "⛪"),
|
|
"building/commercial": ("Commercial Building", "🏬"),
|
|
"building/construction": ("Construction Site", "🚧"),
|
|
"building/farm": ("Farmhouse", "🌾"),
|
|
"building/greenhouse": ("Greenhouse", "🌿"),
|
|
"building/industrial": ("Industrial Building", "🏭"),
|
|
"building/kiosk": ("Kiosk", "🏪"),
|
|
"building/retail": ("Retail Building", "🏬"),
|
|
"building/ruins": ("Ruins", "🏚️"),
|
|
"building/school": ("School Building", "🏫"),
|
|
"building/semidetached_house": ("Semi-Detached House", "🏠"),
|
|
"building/service": ("Service Building", "🔧"),
|
|
"building/university": ("University Building", "🎓"),
|
|
"building/warehouse": ("Warehouse", "🏭"),
|
|
# craft
|
|
"craft/agricultural_engines": ("Agricultural Engines", "🚜"),
|
|
"craft/atelier": ("Atelier", "🎨"),
|
|
"craft/blacksmith": ("Blacksmith", "🔨"),
|
|
"craft/bookbinder": ("Bookbinder", "📖"),
|
|
"craft/brewery": ("Brewery", "🍺"),
|
|
"craft/builder": ("Builder", "🧱"),
|
|
"craft/carpenter": ("Carpenter", "🪚"),
|
|
"craft/caterer": ("Caterer", "🍱"),
|
|
"craft/cleaning": ("Cleaning Service", "🧹"),
|
|
"craft/confectionery": ("Confectioner", "🍬"),
|
|
"craft/distillery": ("Distillery", "🥃"),
|
|
"craft/dressmaker": ("Dressmaker", "👗"),
|
|
"craft/electrician": ("Electrician", "⚡"),
|
|
"craft/electronics_repair": ("Electronics Repair", "🔌"),
|
|
"craft/floorer": ("Flooring Specialist", "🪵"),
|
|
"craft/gardener": ("Gardener", "🌱"),
|
|
"craft/glaziery": ("Glazier", "🪟"),
|
|
"craft/handicraft": ("Handicraft", "✂️"),
|
|
"craft/hvac": ("HVAC", "❄️"),
|
|
"craft/jeweller": ("Jeweller", "💎"),
|
|
"craft/joiner": ("Joiner", "🪚"),
|
|
"craft/key_cutter": ("Key Cutter", "🔑"),
|
|
"craft/locksmith": ("Locksmith", "🔐"),
|
|
"craft/metal_construction": ("Metal Fabrication", "🔩"),
|
|
"craft/painter": ("Painter & Decorator", "🖌️"),
|
|
"craft/photographer": ("Photographer", "📷"),
|
|
"craft/photographic_laboratory": ("Photo Lab", "🖼️"),
|
|
"craft/plumber": ("Plumber", "🔧"),
|
|
"craft/pottery": ("Pottery", "🏺"),
|
|
"craft/printer": ("Printer", "🖨️"),
|
|
"craft/roofer": ("Roofer", "🏠"),
|
|
"craft/sawmill": ("Sawmill", "🪵"),
|
|
"craft/scaffolder": ("Scaffolder", "🏗️"),
|
|
"craft/sculptor": ("Sculptor", "🗿"),
|
|
"craft/shoemaker": ("Shoemaker", "👞"),
|
|
"craft/signmaker": ("Sign Maker", "🪧"),
|
|
"craft/stonemason": ("Stonemason", "🪨"),
|
|
"craft/tailor": ("Tailor", "🧵"),
|
|
"craft/upholsterer": ("Upholsterer", "🛋️"),
|
|
"craft/watchmaker": ("Watchmaker", "⌚"),
|
|
"craft/window_construction": ("Window Fitter", "🪟"),
|
|
"craft/winery": ("Winery", "🍷"),
|
|
"craft/yes": ("Craft Workshop", "🛠️"),
|
|
# healthcare
|
|
"healthcare/alternative": ("Alternative Medicine", "🌿"),
|
|
"healthcare/audiologist": ("Audiologist", "👂"),
|
|
"healthcare/centre": ("Health Centre", "🏥"),
|
|
"healthcare/clinic": ("Health Clinic", "🩺"),
|
|
"healthcare/counselling": ("Counselling", "🧠"),
|
|
"healthcare/dentist": ("Dental Practice", "🦷"),
|
|
"healthcare/doctor": ("GP Surgery", "👨⚕️"),
|
|
"healthcare/hospital": ("Hospital", "🏥"),
|
|
"healthcare/laboratory": ("Medical Lab", "🔬"),
|
|
"healthcare/optometrist": ("Optometrist", "👁️"),
|
|
"healthcare/pharmacy": ("Pharmacy", "💊"),
|
|
"healthcare/physiotherapist": ("Physiotherapist", "🏃"),
|
|
"healthcare/podiatrist": ("Podiatrist", "🦶"),
|
|
"healthcare/psychotherapist": ("Psychotherapist", "🧠"),
|
|
"healthcare/rehabilitation": ("Rehabilitation Centre", "♿"),
|
|
"healthcare/vaccination_centre": ("Vaccination Centre", "💉"),
|
|
"healthcare/yes": ("Healthcare Facility", "🏥"),
|
|
# leisure
|
|
"leisure/adult_gaming_centre": ("Adult Gaming Centre", "🎮"),
|
|
"leisure/amusement_arcade": ("Amusement Arcade", "🕹️"),
|
|
"leisure/bandstand": ("Bandstand", "🎺"),
|
|
"leisure/bathing_place": ("Bathing Spot", "🏖️"),
|
|
"leisure/bird_hide": ("Bird Hide", "🐦"),
|
|
"leisure/bowling_alley": ("Bowling Alley", "🎳"),
|
|
"leisure/common": ("Common Land", "🌳"),
|
|
"leisure/dance": ("Dance Venue", "💃"),
|
|
"leisure/dog_park": ("Dog Park", "🐕"),
|
|
"leisure/escape_game": ("Escape Room", "🔓"),
|
|
"leisure/fitness_centre": ("Gym", "🏋️"),
|
|
"leisure/fitness_station": ("Outdoor Gym", "💪"),
|
|
"leisure/garden": ("Garden", "🌷"),
|
|
"leisure/golf_course": ("Golf Course", "⛳"),
|
|
"leisure/hackerspace": ("Hackerspace", "💻"),
|
|
"leisure/horse_riding": ("Horse Riding", "🐎"),
|
|
"leisure/indoor_play": ("Indoor Play Area", "🧒"),
|
|
"leisure/marina": ("Marina", "⚓"),
|
|
"leisure/miniature_golf": ("Mini Golf", "⛳"),
|
|
"leisure/nature_reserve": ("Nature Reserve", "🦔"),
|
|
"leisure/outdoor_seating": ("Outdoor Seating", "🪑"),
|
|
"leisure/park": ("Park", "🌳"),
|
|
"leisure/pitch": ("Sports Pitch", "⚽"),
|
|
"leisure/playground": ("Playground", "🛝"),
|
|
"leisure/sauna": ("Sauna", "🧖"),
|
|
"leisure/slipway": ("Slipway", "🚤"),
|
|
"leisure/social_club": ("Social Club", "🍻"),
|
|
"leisure/sports_centre": ("Sports Centre", "🏟️"),
|
|
"leisure/sports_hall": ("Sports Hall", "🏀"),
|
|
"leisure/swimming_pool": ("Swimming Pool", "🏊"),
|
|
"leisure/tanning_salon": ("Tanning Salon", "☀️"),
|
|
"leisure/track": ("Running Track", "🏃"),
|
|
"leisure/trampoline_park": ("Trampoline Park", "🤸"),
|
|
"leisure/water_park": ("Water Park", "🌊"),
|
|
"leisure/wildlife_hide": ("Wildlife Hide", "🦌"),
|
|
"leisure/yes": ("Leisure Facility", "🎉"),
|
|
# office
|
|
"office/accountant": ("Accountant", "🧮"),
|
|
"office/advertising_agency": ("Advertising Agency", "📢"),
|
|
"office/architect": ("Architect", "📐"),
|
|
"office/association": ("Association", "🏛️"),
|
|
"office/charity": ("Charity", "❤️"),
|
|
"office/construction_company": ("Construction Company", "🏗️"),
|
|
"office/consulting": ("Consulting Firm", "📊"),
|
|
"office/courier": ("Courier Service", "📦"),
|
|
"office/coworking": ("Co-working Space", "💻"),
|
|
"office/design": ("Design Studio", "🎨"),
|
|
"office/diplomatic": ("Diplomatic Office", "🏛️"),
|
|
"office/educational_institution": ("Education Office", "🎓"),
|
|
"office/employment_agency": ("Employment Agency", "💼"),
|
|
"office/energy_supplier": ("Energy Supplier", "⚡"),
|
|
"office/engineer": ("Engineering Firm", "⚙️"),
|
|
"office/estate_agent": ("Estate Agent", "🏠"),
|
|
"office/financial": ("Financial Services", "💰"),
|
|
"office/financial_advisor": ("Financial Advisor", "📈"),
|
|
"office/foundation": ("Foundation", "🏛️"),
|
|
"office/government": ("Government Office", "🏛️"),
|
|
"office/graphic_design": ("Graphic Design", "🖌️"),
|
|
"office/healthcare": ("Healthcare Office", "🏥"),
|
|
"office/home_care": ("Home Care Service", "🏠"),
|
|
"office/insurance": ("Insurance", "🛡️"),
|
|
"office/interior_design": ("Interior Design", "🛋️"),
|
|
"office/it": ("IT Company", "💻"),
|
|
"office/lawyer": ("Lawyer", "⚖️"),
|
|
"office/logistics": ("Logistics", "🚚"),
|
|
"office/marketing": ("Marketing Agency", "📣"),
|
|
"office/mortgage": ("Mortgage Broker", "🏦"),
|
|
"office/moving_company": ("Moving Company", "📦"),
|
|
"office/newspaper": ("Newspaper Office", "📰"),
|
|
"office/ngo": ("NGO", "🌍"),
|
|
"office/notary": ("Notary", "📜"),
|
|
"office/political_party": ("Political Party", "🗳️"),
|
|
"office/politician": ("Politician Office", "🏛️"),
|
|
"office/property_management": ("Property Management", "🏘️"),
|
|
"office/recruitment": ("Recruitment Agency", "👥"),
|
|
"office/religion": ("Religious Office", "✝️"),
|
|
"office/research": ("Research Office", "🔬"),
|
|
"office/security": ("Security Company", "🔒"),
|
|
"office/solicitor": ("Solicitor", "⚖️"),
|
|
"office/surveyor": ("Surveyor", "📏"),
|
|
"office/tax_advisor": ("Tax Advisor", "🧾"),
|
|
"office/taxi": ("Taxi Office", "🚕"),
|
|
"office/telecommunication": ("Telecoms Office", "📡"),
|
|
"office/therapist": ("Therapist", "🧠"),
|
|
"office/travel_agent": ("Travel Agent", "✈️"),
|
|
"office/union": ("Trade Union", "✊"),
|
|
"office/university": ("University Office", "🎓"),
|
|
"office/vacant": ("Vacant Office", "🏚️"),
|
|
"office/web_design": ("Web Design", "🌐"),
|
|
# public_transport
|
|
"public_transport/entrance": ("Transport Entrance", "🚪"),
|
|
"public_transport/platform": ("Platform", "🚉"),
|
|
"public_transport/station": ("Station", "🚉"),
|
|
"public_transport/stop_position": ("Stop", "🚏"),
|
|
# shop
|
|
"shop/accessories": ("Accessories Shop", "👜"),
|
|
"shop/agrarian": ("Farm Supply Shop", "🌾"),
|
|
"shop/alcohol": ("Off-Licence", "🍷"),
|
|
"shop/antiques": ("Antiques Shop", "🏺"),
|
|
"shop/appliance": ("Appliance Shop", "🔌"),
|
|
"shop/art": ("Art Shop", "🎨"),
|
|
"shop/baby_goods": ("Baby Shop", "🍼"),
|
|
"shop/bag": ("Bag Shop", "👜"),
|
|
"shop/bakery": ("Bakery", "🥐"),
|
|
"shop/bathroom": ("Bathroom Shop", "🛁"),
|
|
"shop/bathroom_furnishing": ("Bathroom Furnishings", "🚿"),
|
|
"shop/beauty": ("Beauty Shop", "💄"),
|
|
"shop/bed": ("Bed Shop", "🛏️"),
|
|
"shop/beverages": ("Drinks Shop", "🥤"),
|
|
"shop/bicycle": ("Bike Shop", "🚲"),
|
|
"shop/boat": ("Boat Shop", "⛵"),
|
|
"shop/bookmaker": ("Bookmaker", "🏇"),
|
|
"shop/books": ("Bookshop", "📚"),
|
|
"shop/boutique": ("Boutique", "👗"),
|
|
"shop/building_materials": ("Building Materials", "🧱"),
|
|
"shop/butcher": ("Butcher", "🥩"),
|
|
"shop/camera": ("Camera Shop", "📷"),
|
|
"shop/candles": ("Candle Shop", "🕯️"),
|
|
"shop/car": ("Car Dealership", "🚗"),
|
|
"shop/car;car_repair": ("Car Sales & Repair", "🚗"),
|
|
"shop/car_parts": ("Car Parts", "🔩"),
|
|
"shop/car_repair": ("Car Repair", "🔧"),
|
|
"shop/caravan": ("Caravan Dealer", "🚐"),
|
|
"shop/carpet": ("Carpet Shop", "🧶"),
|
|
"shop/catalogue": ("Catalogue Shop", "📋"),
|
|
"shop/charity": ("Charity Shop", "❤️"),
|
|
"shop/cheese": ("Cheese Shop", "🧀"),
|
|
"shop/chemist": ("Chemist", "🧪"),
|
|
"shop/chocolate": ("Chocolate Shop", "🍫"),
|
|
"shop/clothes": ("Clothes Shop", "👕"),
|
|
"shop/coffee": ("Coffee Shop", "☕"),
|
|
"shop/collector": ("Collector Shop", "🏆"),
|
|
"shop/computer": ("Computer Shop", "🖥️"),
|
|
"shop/confectionery": ("Sweet Shop", "🍬"),
|
|
"shop/convenience": ("Convenience Store", "🏪"),
|
|
"shop/copyshop": ("Copy Shop", "🖨️"),
|
|
"shop/cosmetics": ("Cosmetics Shop", "💅"),
|
|
"shop/country_store": ("Country Store", "🏡"),
|
|
"shop/craft": ("Craft Shop", "✂️"),
|
|
"shop/curtain": ("Curtain Shop", "🪟"),
|
|
"shop/dairy": ("Dairy Shop", "🥛"),
|
|
"shop/deli": ("Delicatessen", "🧆"),
|
|
"shop/department_store": ("Department Store", "🏬"),
|
|
"shop/discount": ("Discount Store", "💲"),
|
|
"shop/doityourself": ("DIY Store", "🔨"),
|
|
"shop/doors": ("Door Shop", "🚪"),
|
|
"shop/dry_cleaning": ("Dry Cleaner", "👔"),
|
|
"shop/e-cigarette": ("Vape Shop", "💨"),
|
|
"shop/electrical": ("Electrical Shop", "⚡"),
|
|
"shop/electronics": ("Electronics Shop", "📱"),
|
|
"shop/erotic": ("Adult Shop", "🔞"),
|
|
"shop/esoteric": ("Esoteric Shop", "🔮"),
|
|
"shop/estate_agent": ("Estate Agent", "🏠"),
|
|
"shop/fabric": ("Fabric Shop", "🧵"),
|
|
"shop/fan": ("Fan Shop", "🏅"),
|
|
"shop/farm": ("Farm Shop", "🥕"),
|
|
"shop/fashion_accessories": ("Fashion Accessories", "👒"),
|
|
"shop/fireplace": ("Fireplace Shop", "🔥"),
|
|
"shop/fishing": ("Fishing Shop", "🎣"),
|
|
"shop/flooring": ("Flooring Shop", "🪵"),
|
|
"shop/florist": ("Florist", "💐"),
|
|
"shop/food": ("Food Shop", "🍞"),
|
|
"shop/frame": ("Framing Shop", "🖼️"),
|
|
"shop/frozen_food": ("Frozen Food Shop", "🧊"),
|
|
"shop/fuel": ("Fuel Shop", "⛽"),
|
|
"shop/funeral_directors": ("Funeral Director", "⚰️"),
|
|
"shop/furniture": ("Furniture Shop", "🪑"),
|
|
"shop/games": ("Games Shop", "🎮"),
|
|
"shop/garden_centre": ("Garden Centre", "🌻"),
|
|
"shop/gas": ("Gas Shop", "🔥"),
|
|
"shop/general": ("General Store", "🏪"),
|
|
"shop/gift": ("Gift Shop", "🎁"),
|
|
"shop/glaziery": ("Glazier", "🪟"),
|
|
"shop/greengrocer": ("Greengrocer", "🥬"),
|
|
"shop/grocery": ("Grocery Shop", "🛒"),
|
|
"shop/haberdashery": ("Haberdashery", "🧵"),
|
|
"shop/hairdresser": ("Hairdresser", "💇"),
|
|
"shop/hairdresser_supply": ("Hairdresser Supply", "💇"),
|
|
"shop/hardware": ("Hardware Shop", "🔩"),
|
|
"shop/health": ("Health Shop", "🌿"),
|
|
"shop/health_food": ("Health Food Shop", "🥗"),
|
|
"shop/hearing_aids": ("Hearing Aid Shop", "👂"),
|
|
"shop/herbalist": ("Herbalist", "🌿"),
|
|
"shop/hifi": ("Hi-Fi Shop", "🔊"),
|
|
"shop/household": ("Household Shop", "🏠"),
|
|
"shop/household_linen": ("Linen Shop", "🛏️"),
|
|
"shop/houseware": ("Houseware Shop", "🍳"),
|
|
"shop/ice_cream": ("Ice Cream Shop", "🍦"),
|
|
"shop/interior_decoration": ("Interior Decoration", "🖼️"),
|
|
"shop/jewelry": ("Jewellery Shop", "💍"),
|
|
"shop/kiosk": ("Kiosk", "🏪"),
|
|
"shop/kitchen": ("Kitchen Shop", "🍳"),
|
|
"shop/laundry": ("Laundry", "🧺"),
|
|
"shop/leather": ("Leather Shop", "🧳"),
|
|
"shop/lighting": ("Lighting Shop", "💡"),
|
|
"shop/locksmith": ("Locksmith", "🔐"),
|
|
"shop/mall": ("Shopping Centre", "🏬"),
|
|
"shop/massage": ("Massage Parlour", "💆"),
|
|
"shop/medical_supply": ("Medical Supply", "🩺"),
|
|
"shop/military_surplus": ("Military Surplus", "🎖️"),
|
|
"shop/mobile_phone": ("Mobile Phone Shop", "📱"),
|
|
"shop/mobile_phone_accessories": ("Phone Accessories", "📱"),
|
|
"shop/mobility": ("Mobility Shop", "♿"),
|
|
"shop/mobility_scooter": ("Mobility Scooter Shop", "🦽"),
|
|
"shop/model": ("Model Shop", "✈️"),
|
|
"shop/money_lender": ("Money Lender", "💰"),
|
|
"shop/motorcycle": ("Motorcycle Shop", "🏍️"),
|
|
"shop/motorcycle_repair": ("Motorcycle Repair", "🔧"),
|
|
"shop/music": ("Music Shop", "🎵"),
|
|
"shop/musical_instrument": ("Musical Instrument Shop", "🎸"),
|
|
"shop/newsagent": ("Newsagent", "📰"),
|
|
"shop/nutrition_supplements": ("Nutrition Shop", "💪"),
|
|
"shop/optician": ("Optician", "👓"),
|
|
"shop/outdoor": ("Outdoor Shop", "🏕️"),
|
|
"shop/outpost": ("Outpost", "📦"),
|
|
"shop/paint": ("Paint Shop", "🎨"),
|
|
"shop/party": ("Party Shop", "🎈"),
|
|
"shop/pastry": ("Pastry Shop", "🥐"),
|
|
"shop/pawnbroker": ("Pawnbroker", "💰"),
|
|
"shop/perfumery": ("Perfumery", "🌸"),
|
|
"shop/pet": ("Pet Shop", "🐾"),
|
|
"shop/pet_grooming": ("Pet Grooming", "🐩"),
|
|
"shop/photo": ("Photo Shop", "📸"),
|
|
"shop/piercing": ("Piercing Studio", "💎"),
|
|
"shop/plant_hire": ("Plant Hire", "🚜"),
|
|
"shop/pottery": ("Pottery Shop", "🏺"),
|
|
"shop/printer_ink": ("Ink & Toner Shop", "🖨️"),
|
|
"shop/printing": ("Print Shop", "🖨️"),
|
|
"shop/psychic": ("Psychic", "🔮"),
|
|
"shop/pyrotechnics": ("Fireworks Shop", "🎆"),
|
|
"shop/religion": ("Religious Shop", "✝️"),
|
|
"shop/rental": ("Rental Shop", "🔑"),
|
|
"shop/repair": ("Repair Shop", "🔧"),
|
|
"shop/scuba_diving": ("Scuba Diving Shop", "🤿"),
|
|
"shop/seafood": ("Fishmonger", "🐟"),
|
|
"shop/second_hand": ("Second-Hand Shop", "♻️"),
|
|
"shop/security": ("Security Shop", "🔒"),
|
|
"shop/sewing": ("Sewing Shop", "🪡"),
|
|
"shop/shoe_repair": ("Shoe Repair", "👞"),
|
|
"shop/shoes": ("Shoe Shop", "👟"),
|
|
"shop/sports": ("Sports Shop", "⚽"),
|
|
"shop/stationery": ("Stationery Shop", "✏️"),
|
|
"shop/storage_rental": ("Self Storage", "📦"),
|
|
"shop/supermarket": ("Supermarket", "🛒"),
|
|
"shop/swimming_pool": ("Pool Supplies", "🏊"),
|
|
"shop/tailor": ("Tailor", "🧵"),
|
|
"shop/tattoo": ("Tattoo Studio", "🖋️"),
|
|
"shop/taxi": ("Taxi Booking", "🚕"),
|
|
"shop/tea": ("Tea Shop", "🫖"),
|
|
"shop/telecommunication": ("Telecoms Shop", "📡"),
|
|
"shop/ticket": ("Ticket Office", "🎫"),
|
|
"shop/tiles": ("Tile Shop", "🔲"),
|
|
"shop/tobacco": ("Tobacconist", "🚬"),
|
|
"shop/tool_hire": ("Tool Hire", "🧰"),
|
|
"shop/toys": ("Toy Shop", "🧸"),
|
|
"shop/trade": ("Trade Supplier", "🏭"),
|
|
"shop/travel_agency": ("Travel Agency", "✈️"),
|
|
"shop/trophy": ("Trophy Shop", "🏆"),
|
|
"shop/tyres": ("Tyre Shop", "🛞"),
|
|
"shop/vacant": ("Vacant Shop", "🏚️"),
|
|
"shop/variety_store": ("Variety Store", "🏪"),
|
|
"shop/video": ("Video Shop", "📀"),
|
|
"shop/video_games": ("Video Game Shop", "🎮"),
|
|
"shop/watches": ("Watch Shop", "⌚"),
|
|
"shop/water_sports": ("Water Sports Shop", "🏄"),
|
|
"shop/weapons": ("Weapons Shop", "🗡️"),
|
|
"shop/wedding": ("Wedding Shop", "💒"),
|
|
"shop/wholesale": ("Wholesaler", "📦"),
|
|
"shop/wigs": ("Wig Shop", "💇"),
|
|
"shop/window_blind": ("Blinds Shop", "🪟"),
|
|
"shop/windows": ("Window Shop", "🪟"),
|
|
"shop/wine": ("Wine Shop", "🍷"),
|
|
"shop/wool": ("Wool Shop", "🧶"),
|
|
"shop/yes": ("Shop", "🛍️"),
|
|
# tourism
|
|
"tourism/artwork": ("Public Artwork", "🎨"),
|
|
"tourism/attraction": ("Tourist Attraction", "📸"),
|
|
"tourism/camp_site": ("Campsite", "⛺"),
|
|
"tourism/caravan_site": ("Caravan Site", "🚐"),
|
|
"tourism/chalet": ("Chalet", "🏔️"),
|
|
"tourism/gallery": ("Gallery", "🖼️"),
|
|
"tourism/guest_house": ("Guest House", "🏡"),
|
|
"tourism/hostel": ("Hostel", "🛏️"),
|
|
"tourism/hotel": ("Hotel", "🏨"),
|
|
"tourism/motel": ("Motel", "🏨"),
|
|
"tourism/museum": ("Museum", "🏛️"),
|
|
"tourism/picnic_site": ("Picnic Site", "🧺"),
|
|
"tourism/preserved_railway": ("Heritage Railway", "🚂"),
|
|
"tourism/theme_park": ("Theme Park", "🎢"),
|
|
"tourism/viewpoint": ("Viewpoint", "🔭"),
|
|
"tourism/zoo": ("Zoo", "🦁"),
|
|
}
|
|
|
|
|
|
def transform(input_path: Path) -> pl.LazyFrame:
|
|
lf = pl.scan_parquet(input_path)
|
|
|
|
# Get all unique categories present in the data
|
|
all_categories = lf.select("category").unique().collect(engine="streaming").to_series().to_list()
|
|
|
|
# Verify every non-dropped category has a mapping
|
|
unmapped = []
|
|
for cat in all_categories:
|
|
if cat not in DROP_CATEGORIES and cat not in CATEGORY_MAP:
|
|
unmapped.append(cat)
|
|
if unmapped:
|
|
raise ValueError(f"Categories missing from CATEGORY_MAP: {sorted(unmapped)}")
|
|
|
|
# Verify every CATEGORY_MAP key actually exists in the data (catch typos)
|
|
mapped_but_absent = []
|
|
all_set = set(all_categories)
|
|
for cat in CATEGORY_MAP:
|
|
if cat not in all_set:
|
|
mapped_but_absent.append(cat)
|
|
if mapped_but_absent:
|
|
raise ValueError(
|
|
f"CATEGORY_MAP contains categories not in data: {sorted(mapped_but_absent)}"
|
|
)
|
|
|
|
# Drop unwanted categories
|
|
lf = lf.filter(~pl.col("category").is_in(list(DROP_CATEGORIES)))
|
|
|
|
# Build name and emoji lookup expressions
|
|
name_mapping = {k: v[0] for k, v in CATEGORY_MAP.items()}
|
|
emoji_mapping = {k: v[1] for k, v in CATEGORY_MAP.items()}
|
|
|
|
# Check no friendly names are missing (defensive)
|
|
missing_names = [k for k, v in CATEGORY_MAP.items() if not v[0]]
|
|
if missing_names:
|
|
raise ValueError(f"Empty friendly names for: {missing_names}")
|
|
missing_emojis = [k for k, v in CATEGORY_MAP.items() if not v[1]]
|
|
if missing_emojis:
|
|
raise ValueError(f"Empty emojis for: {missing_emojis}")
|
|
|
|
lf = lf.with_columns(
|
|
pl.col("category").replace_strict(name_mapping).alias("category"),
|
|
pl.col("category").replace_strict(emoji_mapping).alias("emoji"),
|
|
)
|
|
|
|
return lf
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Transform raw POIs to filtered version with friendly names"
|
|
)
|
|
parser.add_argument(
|
|
"--input", type=Path, required=True, help="Raw POIs parquet file"
|
|
)
|
|
parser.add_argument(
|
|
"--output", type=Path, required=True, help="Output filtered POIs parquet file"
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
df = transform(args.input).collect(engine="streaming")
|
|
|
|
df.write_parquet(args.output)
|
|
|
|
size_mb = args.output.stat().st_size / (1024 * 1024)
|
|
print(f"Wrote {args.output} ({size_mb:.1f} MB, {len(df):,} POIs)")
|
|
print(f"\nCategories ({df['category'].n_unique()}):")
|
|
counts = df.group_by("category", "emoji").len().sort("len", descending=True)
|
|
for row in counts.iter_rows(named=True):
|
|
print(f" {row['emoji']} {row['category']}: {row['len']:,}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|