import argparse from pathlib import Path import polars as pl DROP_CATEGORIES = { "amenity/advice", "amenity/atm", "amenity/bbq", "amenity/bench", "amenity/bicycle_parking", "amenity/clock", "amenity/fixme", "amenity/grit_bin", "amenity/hunting_stand", "amenity/motorcycle_parking", "amenity/notice_board", "amenity/parking", "amenity/parking_entrance", "amenity/parking_space", "amenity/post_box", "amenity/telephone", "amenity/toilets", "amenity/vacuum_cleaner", "amenity/waste_basket", "building/air_shaft", "building/apartments", "building/detached", "building/entrance", "building/entry", "building/garage", "building/garages", "building/house", "building/hut", "building/no", "building/office", "building/public", "building/residential", "building/roof", "building/shed", "building/terrace", "building/yes", "emergency/access_point", "emergency/ambulance_station", "emergency/assembly_point", "emergency/bleed_control_kit", "emergency/defibrillator", "emergency/designated", "emergency/dry_riser_inlet", "emergency/emergency_ward_entrance", "emergency/fire_alarm_box", "emergency/fire_extinguisher", "emergency/fire_hydrant", "emergency/fire_service_inlet", "emergency/first_aid_kit", "emergency/life_ring", "emergency/lifeguard", "emergency/no", "emergency/phone", "emergency/rescue_equipment", "emergency/siren", "emergency/throw_bag", "emergency/water_rescue", "emergency/yes", "leisure/firepit", "leisure/fishing", "leisure/picnic_table", "office/company", "office/yes", "tourism/apartment", "tourism/apartments", "tourism/camp_pitch", "tourism/information", "tourism/village_sign", "tourism/yes", } # (friendly_name, emoji) for every category we keep CATEGORY_MAP: dict[str, tuple[str, str]] = { # amenity "amenity/animal_boarding": ("Animal Boarding", "๐Ÿพ"), "amenity/animal_breeding": ("Animal Breeding", "๐Ÿฃ"), "amenity/animal_shelter": ("Animal Shelter", "๐Ÿ "), "amenity/arts_centre": ("Arts Centre", "๐ŸŽจ"), "amenity/bank": ("Bank", "๐Ÿฆ"), "amenity/bar": ("Bar", "๐Ÿธ"), "amenity/bicycle_rental": ("Bike Rental", "๐Ÿšฒ"), "amenity/bicycle_repair_station": ("Bike Repair", "๐Ÿ”ง"), "amenity/binoculars": ("Public Binoculars", "๐Ÿ”ญ"), "amenity/boat_rental": ("Boat Rental", "โ›ต"), "amenity/boat_storage": ("Boat Storage", "๐Ÿšข"), "amenity/boot_scraper": ("Boot Scraper", "๐Ÿฅพ"), "amenity/bureau_de_change": ("Currency Exchange", "๐Ÿ’ฑ"), "amenity/bus_station": ("Bus Station", "๐ŸšŒ"), "amenity/cafe": ("Cafรฉ", "โ˜•"), "amenity/car_rental": ("Car Rental", "๐Ÿš—"), "amenity/car_sharing": ("Car Sharing", "๐Ÿš™"), "amenity/car_wash": ("Car Wash", "๐Ÿงฝ"), "amenity/care_home": ("Care Home", "๐Ÿฅ"), "amenity/casino": ("Casino", "๐ŸŽฐ"), "amenity/charging_station": ("EV Charging", "๐Ÿ”Œ"), "amenity/check_in": ("Check-In Point", "โœ…"), "amenity/childcare": ("Childcare", "๐Ÿ‘ถ"), "amenity/cinema": ("Cinema", "๐ŸŽฌ"), "amenity/clinic": ("Clinic", "๐Ÿฉบ"), "amenity/club": ("Club", "๐Ÿ›๏ธ"), "amenity/college": ("College", "๐ŸŽ“"), "amenity/community_centre": ("Community Centre", "๐Ÿค"), "amenity/compressed_air": ("Compressed Air", "๐Ÿ’จ"), "amenity/conference_centre": ("Conference Centre", "๐Ÿ“‹"), "amenity/courthouse": ("Courthouse", "โš–๏ธ"), "amenity/coworking_space": ("Co-working Space", "๐Ÿ’ป"), "amenity/crematorium": ("Crematorium", "๐Ÿ•ฏ๏ธ"), "amenity/dancing_school": ("Dance School", "๐Ÿ’ƒ"), "amenity/dentist": ("Dentist", "๐Ÿฆท"), "amenity/doctors": ("Doctor", "๐Ÿ‘จโ€โš•๏ธ"), "amenity/dojo": ("Dojo", "๐Ÿฅ‹"), "amenity/donation_box": ("Donation Box", "๐Ÿ“ฆ"), "amenity/dressing_room": ("Dressing Room", "๐Ÿ‘—"), "amenity/drinking_water": ("Drinking Water", "๐Ÿšฐ"), "amenity/driving_school": ("Driving School", "๐Ÿšฆ"), "amenity/escooter_rental": ("E-Scooter Rental", "๐Ÿ›ด"), "amenity/events_venue": ("Events Venue", "๐ŸŽช"), "amenity/fast_food": ("Fast Food", "๐Ÿ”"), "amenity/feeding_place": ("Feeding Place", "๐Ÿฝ๏ธ"), "amenity/ferry_terminal": ("Ferry Terminal", "โ›ด๏ธ"), "amenity/fire_station": ("Fire Station", "๐Ÿš’"), "amenity/food_court": ("Food Court", "๐Ÿด"), "amenity/fountain": ("Fountain", "โ›ฒ"), "amenity/fuel": ("Fuel Station", "โ›ฝ"), "amenity/gambling": ("Gambling", "๐ŸŽฒ"), "amenity/grave_yard": ("Graveyard", "๐Ÿชฆ"), "amenity/hall": ("Hall", "๐Ÿ›๏ธ"), "amenity/hookah_lounge": ("Hookah Lounge", "๐Ÿ’จ"), "amenity/hospital": ("Hospital", "๐Ÿฅ"), "amenity/ice_cream": ("Ice Cream", "๐Ÿฆ"), "amenity/internet_cafe": ("Internet Cafรฉ", "๐ŸŒ"), "amenity/kick-scooter_rental": ("Kick Scooter Rental", "๐Ÿ›ด"), "amenity/kindergarten": ("Kindergarten", "๐Ÿ’’"), "amenity/language_school": ("Language School", "๐Ÿ—ฃ๏ธ"), "amenity/letter_box": ("Letter Box", "๐Ÿ“ฎ"), "amenity/library": ("Library", "๐Ÿ“š"), "amenity/loading_dock": ("Loading Dock", "๐Ÿ“ฅ"), "amenity/lounge": ("Lounge", "๐Ÿ›‹๏ธ"), "amenity/lounger": ("Public Lounger", "๐Ÿช‘"), "amenity/marketplace": ("Market", "๐Ÿ›’"), "amenity/money_transfer": ("Money Transfer", "๐Ÿ’ธ"), "amenity/mounting_block": ("Mounting Block", "๐Ÿด"), "amenity/music_school": ("Music School", "๐ŸŽต"), "amenity/music_venue": ("Music Venue", "๐ŸŽถ"), "amenity/nightclub": ("Nightclub", "๐Ÿชฉ"), "amenity/nursing_home": ("Nursing Home", "๐Ÿ "), "amenity/parcel_locker": ("Parcel Locker", "๐Ÿ“ฆ"), "amenity/payment_terminal": ("Payment Terminal", "๐Ÿ’ณ"), "amenity/pharmacy": ("Pharmacy", "๐Ÿ’Š"), "amenity/photo_booth": ("Photo Booth", "๐Ÿ“ธ"), "amenity/piano": ("Public Piano", "๐ŸŽน"), "amenity/place_of_worship": ("Place of Worship", "โ›ช"), "amenity/police": ("Police Station", "๐Ÿš”"), "amenity/post_depot": ("Post Depot", "๐Ÿ“ฌ"), "amenity/post_office": ("Post Office", "๐Ÿค"), "amenity/prep_school": ("Prep School", "๐Ÿ“–"), "amenity/pub": ("Pub", "๐Ÿบ"), "amenity/public_bookcase": ("Public Bookcase", "๐Ÿ“•"), "amenity/public_building": ("Public Building", "๐Ÿข"), "amenity/reception_desk": ("Reception Desk", "๐Ÿ›Ž๏ธ"), "amenity/recycling": ("Recycling", "โ™ป๏ธ"), "amenity/restaurant": ("Restaurant", "๐Ÿฝ๏ธ"), "amenity/sanitary_dump_station": ("Sanitary Dump Station", "๐Ÿšฟ"), "amenity/school": ("School", "๐Ÿซ"), "amenity/scout_hut": ("Scout Hut", "โšœ๏ธ"), "amenity/shelter": ("Shelter", "๐Ÿ›–"), "amenity/shower": ("Public Shower", "๐Ÿšฟ"), "amenity/smoking_area": ("Smoking Area", "๐Ÿšฌ"), "amenity/social_centre": ("Social Centre", "๐Ÿ˜๏ธ"), "amenity/social_club": ("Social Club", "๐Ÿค"), "amenity/social_facility": ("Social Facility", "๐Ÿซ‚"), "amenity/stripclub": ("Strip Club", "๐Ÿ”ž"), "amenity/studio": ("Studio", "๐ŸŽ™๏ธ"), "amenity/table": ("Public Table", "๐Ÿช‘"), "amenity/taxi": ("Taxi Stand", "๐Ÿš•"), "amenity/telescope": ("Public Telescope", "๐Ÿ”ญ"), "amenity/theatre": ("Theatre", "๐ŸŽญ"), "amenity/ticket_validator": ("Ticket Validator", "๐ŸŽซ"), "amenity/townhall": ("Town Hall", "๐Ÿ›๏ธ"), "amenity/training": ("Training Centre", "๐Ÿ“"), "amenity/trolley_bay": ("Trolley Bay", "๐Ÿ›’"), "amenity/university": ("University", "๐Ÿซ"), "amenity/vehicle_inspection": ("Vehicle Inspection", "๐Ÿ”"), "amenity/vending_machine": ("Vending Machine", "๐Ÿง"), "amenity/veterinary": ("Vet", "๐Ÿ•"), "amenity/washing_machine": ("Washing Machine", "๐Ÿงบ"), "amenity/washingline": ("Washing Line", "๐Ÿ‘•"), "amenity/waste_disposal": ("Waste Disposal", "๐Ÿ—‘๏ธ"), "amenity/waste_transfer_station": ("Waste Transfer Station", "๐Ÿš›"), "amenity/water_point": ("Water Point", "๐Ÿ’ง"), "amenity/watering_place": ("Watering Place", "๐Ÿšฐ"), "amenity/weighbridge": ("Weighbridge", "โš–๏ธ"), # building "building/barn": ("Barn", "๐Ÿš๏ธ"), "building/bunker": ("Bunker", "๐Ÿ—๏ธ"), "building/chapel": ("Chapel", "โ›ช"), "building/church": ("Church", "โ›ช"), "building/commercial": ("Commercial Building", "๐Ÿฌ"), "building/construction": ("Construction Site", "๐Ÿšง"), "building/farm": ("Farmhouse", "๐ŸŒพ"), "building/greenhouse": ("Greenhouse", "๐ŸŒฟ"), "building/industrial": ("Industrial Building", "๐Ÿญ"), "building/kiosk": ("Kiosk", "๐Ÿช"), "building/retail": ("Retail Building", "๐Ÿฌ"), "building/ruins": ("Ruins", "๐Ÿš๏ธ"), "building/school": ("School Building", "๐Ÿซ"), "building/semidetached_house": ("Semi-Detached House", "๐Ÿ "), "building/service": ("Service Building", "๐Ÿ”ง"), "building/university": ("University Building", "๐ŸŽ“"), "building/warehouse": ("Warehouse", "๐Ÿญ"), # craft "craft/agricultural_engines": ("Agricultural Engines", "๐Ÿšœ"), "craft/atelier": ("Atelier", "๐ŸŽจ"), "craft/blacksmith": ("Blacksmith", "๐Ÿ”จ"), "craft/bookbinder": ("Bookbinder", "๐Ÿ“–"), "craft/brewery": ("Brewery", "๐Ÿบ"), "craft/builder": ("Builder", "๐Ÿงฑ"), "craft/carpenter": ("Carpenter", "๐Ÿชš"), "craft/caterer": ("Caterer", "๐Ÿฑ"), "craft/cleaning": ("Cleaning Service", "๐Ÿงน"), "craft/confectionery": ("Confectioner", "๐Ÿฌ"), "craft/distillery": ("Distillery", "๐Ÿฅƒ"), "craft/dressmaker": ("Dressmaker", "๐Ÿ‘—"), "craft/electrician": ("Electrician", "โšก"), "craft/electronics_repair": ("Electronics Repair", "๐Ÿ”Œ"), "craft/floorer": ("Flooring Specialist", "๐Ÿชต"), "craft/gardener": ("Gardener", "๐ŸŒฑ"), "craft/glaziery": ("Glazier", "๐ŸชŸ"), "craft/handicraft": ("Handicraft", "โœ‚๏ธ"), "craft/hvac": ("HVAC", "โ„๏ธ"), "craft/jeweller": ("Jeweller", "๐Ÿ’Ž"), "craft/joiner": ("Joiner", "๐Ÿชš"), "craft/key_cutter": ("Key Cutter", "๐Ÿ”‘"), "craft/locksmith": ("Locksmith", "๐Ÿ”"), "craft/metal_construction": ("Metal Fabrication", "๐Ÿ”ฉ"), "craft/painter": ("Painter & Decorator", "๐Ÿ–Œ๏ธ"), "craft/photographer": ("Photographer", "๐Ÿ“ท"), "craft/photographic_laboratory": ("Photo Lab", "๐Ÿ–ผ๏ธ"), "craft/plumber": ("Plumber", "๐Ÿ”ง"), "craft/pottery": ("Pottery", "๐Ÿบ"), "craft/printer": ("Printer", "๐Ÿ–จ๏ธ"), "craft/roofer": ("Roofer", "๐Ÿ "), "craft/sawmill": ("Sawmill", "๐Ÿชต"), "craft/scaffolder": ("Scaffolder", "๐Ÿ—๏ธ"), "craft/sculptor": ("Sculptor", "๐Ÿ—ฟ"), "craft/shoemaker": ("Shoemaker", "๐Ÿ‘ž"), "craft/signmaker": ("Sign Maker", "๐Ÿชง"), "craft/stonemason": ("Stonemason", "๐Ÿชจ"), "craft/tailor": ("Tailor", "๐Ÿงต"), "craft/upholsterer": ("Upholsterer", "๐Ÿ›‹๏ธ"), "craft/watchmaker": ("Watchmaker", "โŒš"), "craft/window_construction": ("Window Fitter", "๐ŸชŸ"), "craft/winery": ("Winery", "๐Ÿท"), "craft/yes": ("Craft Workshop", "๐Ÿ› ๏ธ"), # healthcare "healthcare/alternative": ("Alternative Medicine", "๐ŸŒฟ"), "healthcare/audiologist": ("Audiologist", "๐Ÿ‘‚"), "healthcare/centre": ("Health Centre", "๐Ÿฅ"), "healthcare/clinic": ("Health Clinic", "๐Ÿฉบ"), "healthcare/counselling": ("Counselling", "๐Ÿง "), "healthcare/dentist": ("Dental Practice", "๐Ÿฆท"), "healthcare/doctor": ("GP Surgery", "๐Ÿ‘จโ€โš•๏ธ"), "healthcare/hospital": ("Hospital", "๐Ÿฅ"), "healthcare/laboratory": ("Medical Lab", "๐Ÿ”ฌ"), "healthcare/optometrist": ("Optometrist", "๐Ÿ‘๏ธ"), "healthcare/pharmacy": ("Pharmacy", "๐Ÿ’Š"), "healthcare/physiotherapist": ("Physiotherapist", "๐Ÿƒ"), "healthcare/podiatrist": ("Podiatrist", "๐Ÿฆถ"), "healthcare/psychotherapist": ("Psychotherapist", "๐Ÿง "), "healthcare/rehabilitation": ("Rehabilitation Centre", "โ™ฟ"), "healthcare/vaccination_centre": ("Vaccination Centre", "๐Ÿ’‰"), "healthcare/yes": ("Healthcare Facility", "๐Ÿฅ"), # leisure "leisure/adult_gaming_centre": ("Adult Gaming Centre", "๐ŸŽฎ"), "leisure/amusement_arcade": ("Amusement Arcade", "๐Ÿ•น๏ธ"), "leisure/bandstand": ("Bandstand", "๐ŸŽบ"), "leisure/bathing_place": ("Bathing Spot", "๐Ÿ–๏ธ"), "leisure/bird_hide": ("Bird Hide", "๐Ÿฆ"), "leisure/bowling_alley": ("Bowling Alley", "๐ŸŽณ"), "leisure/common": ("Common Land", "๐ŸŒณ"), "leisure/dance": ("Dance Venue", "๐Ÿ’ƒ"), "leisure/dog_park": ("Dog Park", "๐Ÿ•"), "leisure/escape_game": ("Escape Room", "๐Ÿ”“"), "leisure/fitness_centre": ("Gym", "๐Ÿ‹๏ธ"), "leisure/fitness_station": ("Outdoor Gym", "๐Ÿ’ช"), "leisure/garden": ("Garden", "๐ŸŒท"), "leisure/golf_course": ("Golf Course", "โ›ณ"), "leisure/hackerspace": ("Hackerspace", "๐Ÿ’ป"), "leisure/horse_riding": ("Horse Riding", "๐ŸŽ"), "leisure/indoor_play": ("Indoor Play Area", "๐Ÿง’"), "leisure/marina": ("Marina", "โš“"), "leisure/miniature_golf": ("Mini Golf", "โ›ณ"), "leisure/nature_reserve": ("Nature Reserve", "๐Ÿฆ”"), "leisure/outdoor_seating": ("Outdoor Seating", "๐Ÿช‘"), "leisure/park": ("Park", "๐ŸŒณ"), "leisure/pitch": ("Sports Pitch", "โšฝ"), "leisure/playground": ("Playground", "๐Ÿ›"), "leisure/sauna": ("Sauna", "๐Ÿง–"), "leisure/slipway": ("Slipway", "๐Ÿšค"), "leisure/social_club": ("Social Club", "๐Ÿป"), "leisure/sports_centre": ("Sports Centre", "๐ŸŸ๏ธ"), "leisure/sports_hall": ("Sports Hall", "๐Ÿ€"), "leisure/swimming_pool": ("Swimming Pool", "๐ŸŠ"), "leisure/tanning_salon": ("Tanning Salon", "โ˜€๏ธ"), "leisure/track": ("Running Track", "๐Ÿƒ"), "leisure/trampoline_park": ("Trampoline Park", "๐Ÿคธ"), "leisure/water_park": ("Water Park", "๐ŸŒŠ"), "leisure/wildlife_hide": ("Wildlife Hide", "๐ŸฆŒ"), "leisure/yes": ("Leisure Facility", "๐ŸŽ‰"), # office "office/accountant": ("Accountant", "๐Ÿงฎ"), "office/advertising_agency": ("Advertising Agency", "๐Ÿ“ข"), "office/architect": ("Architect", "๐Ÿ“"), "office/association": ("Association", "๐Ÿ›๏ธ"), "office/charity": ("Charity", "โค๏ธ"), "office/construction_company": ("Construction Company", "๐Ÿ—๏ธ"), "office/consulting": ("Consulting Firm", "๐Ÿ“Š"), "office/courier": ("Courier Service", "๐Ÿ“ฆ"), "office/coworking": ("Co-working Space", "๐Ÿ’ป"), "office/design": ("Design Studio", "๐ŸŽจ"), "office/diplomatic": ("Diplomatic Office", "๐Ÿ›๏ธ"), "office/educational_institution": ("Education Office", "๐ŸŽ“"), "office/employment_agency": ("Employment Agency", "๐Ÿ’ผ"), "office/energy_supplier": ("Energy Supplier", "โšก"), "office/engineer": ("Engineering Firm", "โš™๏ธ"), "office/estate_agent": ("Estate Agent", "๐Ÿ "), "office/financial": ("Financial Services", "๐Ÿ’ฐ"), "office/financial_advisor": ("Financial Advisor", "๐Ÿ“ˆ"), "office/foundation": ("Foundation", "๐Ÿ›๏ธ"), "office/government": ("Government Office", "๐Ÿ›๏ธ"), "office/graphic_design": ("Graphic Design", "๐Ÿ–Œ๏ธ"), "office/healthcare": ("Healthcare Office", "๐Ÿฅ"), "office/home_care": ("Home Care Service", "๐Ÿ "), "office/insurance": ("Insurance", "๐Ÿ›ก๏ธ"), "office/interior_design": ("Interior Design", "๐Ÿ›‹๏ธ"), "office/it": ("IT Company", "๐Ÿ’ป"), "office/lawyer": ("Lawyer", "โš–๏ธ"), "office/logistics": ("Logistics", "๐Ÿšš"), "office/marketing": ("Marketing Agency", "๐Ÿ“ฃ"), "office/mortgage": ("Mortgage Broker", "๐Ÿฆ"), "office/moving_company": ("Moving Company", "๐Ÿ“ฆ"), "office/newspaper": ("Newspaper Office", "๐Ÿ“ฐ"), "office/ngo": ("NGO", "๐ŸŒ"), "office/notary": ("Notary", "๐Ÿ“œ"), "office/political_party": ("Political Party", "๐Ÿ—ณ๏ธ"), "office/politician": ("Politician Office", "๐Ÿ›๏ธ"), "office/property_management": ("Property Management", "๐Ÿ˜๏ธ"), "office/recruitment": ("Recruitment Agency", "๐Ÿ‘ฅ"), "office/religion": ("Religious Office", "โœ๏ธ"), "office/research": ("Research Office", "๐Ÿ”ฌ"), "office/security": ("Security Company", "๐Ÿ”’"), "office/solicitor": ("Solicitor", "โš–๏ธ"), "office/surveyor": ("Surveyor", "๐Ÿ“"), "office/tax_advisor": ("Tax Advisor", "๐Ÿงพ"), "office/taxi": ("Taxi Office", "๐Ÿš•"), "office/telecommunication": ("Telecoms Office", "๐Ÿ“ก"), "office/therapist": ("Therapist", "๐Ÿง "), "office/travel_agent": ("Travel Agent", "โœˆ๏ธ"), "office/union": ("Trade Union", "โœŠ"), "office/university": ("University Office", "๐ŸŽ“"), "office/vacant": ("Vacant Office", "๐Ÿš๏ธ"), "office/web_design": ("Web Design", "๐ŸŒ"), # public_transport "public_transport/entrance": ("Transport Entrance", "๐Ÿšช"), "public_transport/platform": ("Platform", "๐Ÿš‰"), "public_transport/station": ("Station", "๐Ÿš‰"), "public_transport/stop_position": ("Stop", "๐Ÿš"), # shop "shop/accessories": ("Accessories Shop", "๐Ÿ‘œ"), "shop/agrarian": ("Farm Supply Shop", "๐ŸŒพ"), "shop/alcohol": ("Off-Licence", "๐Ÿท"), "shop/antiques": ("Antiques Shop", "๐Ÿบ"), "shop/appliance": ("Appliance Shop", "๐Ÿ”Œ"), "shop/art": ("Art Shop", "๐ŸŽจ"), "shop/baby_goods": ("Baby Shop", "๐Ÿผ"), "shop/bag": ("Bag Shop", "๐Ÿ‘œ"), "shop/bakery": ("Bakery", "๐Ÿฅ"), "shop/bathroom": ("Bathroom Shop", "๐Ÿ›"), "shop/bathroom_furnishing": ("Bathroom Furnishings", "๐Ÿšฟ"), "shop/beauty": ("Beauty Shop", "๐Ÿ’„"), "shop/bed": ("Bed Shop", "๐Ÿ›๏ธ"), "shop/beverages": ("Drinks Shop", "๐Ÿฅค"), "shop/bicycle": ("Bike Shop", "๐Ÿšฒ"), "shop/boat": ("Boat Shop", "โ›ต"), "shop/bookmaker": ("Bookmaker", "๐Ÿ‡"), "shop/books": ("Bookshop", "๐Ÿ“š"), "shop/boutique": ("Boutique", "๐Ÿ‘—"), "shop/building_materials": ("Building Materials", "๐Ÿงฑ"), "shop/butcher": ("Butcher", "๐Ÿฅฉ"), "shop/camera": ("Camera Shop", "๐Ÿ“ท"), "shop/candles": ("Candle Shop", "๐Ÿ•ฏ๏ธ"), "shop/car": ("Car Dealership", "๐Ÿš—"), "shop/car;car_repair": ("Car Sales & Repair", "๐Ÿš—"), "shop/car_parts": ("Car Parts", "๐Ÿ”ฉ"), "shop/car_repair": ("Car Repair", "๐Ÿ”ง"), "shop/caravan": ("Caravan Dealer", "๐Ÿš"), "shop/carpet": ("Carpet Shop", "๐Ÿงถ"), "shop/catalogue": ("Catalogue Shop", "๐Ÿ“‹"), "shop/charity": ("Charity Shop", "โค๏ธ"), "shop/cheese": ("Cheese Shop", "๐Ÿง€"), "shop/chemist": ("Chemist", "๐Ÿงช"), "shop/chocolate": ("Chocolate Shop", "๐Ÿซ"), "shop/clothes": ("Clothes Shop", "๐Ÿ‘•"), "shop/coffee": ("Coffee Shop", "โ˜•"), "shop/collector": ("Collector Shop", "๐Ÿ†"), "shop/computer": ("Computer Shop", "๐Ÿ–ฅ๏ธ"), "shop/confectionery": ("Sweet Shop", "๐Ÿฌ"), "shop/convenience": ("Convenience Store", "๐Ÿช"), "shop/copyshop": ("Copy Shop", "๐Ÿ–จ๏ธ"), "shop/cosmetics": ("Cosmetics Shop", "๐Ÿ’…"), "shop/country_store": ("Country Store", "๐Ÿก"), "shop/craft": ("Craft Shop", "โœ‚๏ธ"), "shop/curtain": ("Curtain Shop", "๐ŸชŸ"), "shop/dairy": ("Dairy Shop", "๐Ÿฅ›"), "shop/deli": ("Delicatessen", "๐Ÿง†"), "shop/department_store": ("Department Store", "๐Ÿฌ"), "shop/discount": ("Discount Store", "๐Ÿ’ฒ"), "shop/doityourself": ("DIY Store", "๐Ÿ”จ"), "shop/doors": ("Door Shop", "๐Ÿšช"), "shop/dry_cleaning": ("Dry Cleaner", "๐Ÿ‘”"), "shop/e-cigarette": ("Vape Shop", "๐Ÿ’จ"), "shop/electrical": ("Electrical Shop", "โšก"), "shop/electronics": ("Electronics Shop", "๐Ÿ“ฑ"), "shop/erotic": ("Adult Shop", "๐Ÿ”ž"), "shop/esoteric": ("Esoteric Shop", "๐Ÿ”ฎ"), "shop/estate_agent": ("Estate Agent", "๐Ÿ "), "shop/fabric": ("Fabric Shop", "๐Ÿงต"), "shop/fan": ("Fan Shop", "๐Ÿ…"), "shop/farm": ("Farm Shop", "๐Ÿฅ•"), "shop/fashion_accessories": ("Fashion Accessories", "๐Ÿ‘’"), "shop/fireplace": ("Fireplace Shop", "๐Ÿ”ฅ"), "shop/fishing": ("Fishing Shop", "๐ŸŽฃ"), "shop/flooring": ("Flooring Shop", "๐Ÿชต"), "shop/florist": ("Florist", "๐Ÿ’"), "shop/food": ("Food Shop", "๐Ÿž"), "shop/frame": ("Framing Shop", "๐Ÿ–ผ๏ธ"), "shop/frozen_food": ("Frozen Food Shop", "๐ŸงŠ"), "shop/fuel": ("Fuel Shop", "โ›ฝ"), "shop/funeral_directors": ("Funeral Director", "โšฐ๏ธ"), "shop/furniture": ("Furniture Shop", "๐Ÿช‘"), "shop/games": ("Games Shop", "๐ŸŽฎ"), "shop/garden_centre": ("Garden Centre", "๐ŸŒป"), "shop/gas": ("Gas Shop", "๐Ÿ”ฅ"), "shop/general": ("General Store", "๐Ÿช"), "shop/gift": ("Gift Shop", "๐ŸŽ"), "shop/glaziery": ("Glazier", "๐ŸชŸ"), "shop/greengrocer": ("Greengrocer", "๐Ÿฅฌ"), "shop/grocery": ("Grocery Shop", "๐Ÿ›’"), "shop/haberdashery": ("Haberdashery", "๐Ÿงต"), "shop/hairdresser": ("Hairdresser", "๐Ÿ’‡"), "shop/hairdresser_supply": ("Hairdresser Supply", "๐Ÿ’‡"), "shop/hardware": ("Hardware Shop", "๐Ÿ”ฉ"), "shop/health": ("Health Shop", "๐ŸŒฟ"), "shop/health_food": ("Health Food Shop", "๐Ÿฅ—"), "shop/hearing_aids": ("Hearing Aid Shop", "๐Ÿ‘‚"), "shop/herbalist": ("Herbalist", "๐ŸŒฟ"), "shop/hifi": ("Hi-Fi Shop", "๐Ÿ”Š"), "shop/household": ("Household Shop", "๐Ÿ "), "shop/household_linen": ("Linen Shop", "๐Ÿ›๏ธ"), "shop/houseware": ("Houseware Shop", "๐Ÿณ"), "shop/ice_cream": ("Ice Cream Shop", "๐Ÿฆ"), "shop/interior_decoration": ("Interior Decoration", "๐Ÿ–ผ๏ธ"), "shop/jewelry": ("Jewellery Shop", "๐Ÿ’"), "shop/kiosk": ("Kiosk", "๐Ÿช"), "shop/kitchen": ("Kitchen Shop", "๐Ÿณ"), "shop/laundry": ("Laundry", "๐Ÿงบ"), "shop/leather": ("Leather Shop", "๐Ÿงณ"), "shop/lighting": ("Lighting Shop", "๐Ÿ’ก"), "shop/locksmith": ("Locksmith", "๐Ÿ”"), "shop/mall": ("Shopping Centre", "๐Ÿฌ"), "shop/massage": ("Massage Parlour", "๐Ÿ’†"), "shop/medical_supply": ("Medical Supply", "๐Ÿฉบ"), "shop/military_surplus": ("Military Surplus", "๐ŸŽ–๏ธ"), "shop/mobile_phone": ("Mobile Phone Shop", "๐Ÿ“ฑ"), "shop/mobile_phone_accessories": ("Phone Accessories", "๐Ÿ“ฑ"), "shop/mobility": ("Mobility Shop", "โ™ฟ"), "shop/mobility_scooter": ("Mobility Scooter Shop", "๐Ÿฆฝ"), "shop/model": ("Model Shop", "โœˆ๏ธ"), "shop/money_lender": ("Money Lender", "๐Ÿ’ฐ"), "shop/motorcycle": ("Motorcycle Shop", "๐Ÿ๏ธ"), "shop/motorcycle_repair": ("Motorcycle Repair", "๐Ÿ”ง"), "shop/music": ("Music Shop", "๐ŸŽต"), "shop/musical_instrument": ("Musical Instrument Shop", "๐ŸŽธ"), "shop/newsagent": ("Newsagent", "๐Ÿ“ฐ"), "shop/nutrition_supplements": ("Nutrition Shop", "๐Ÿ’ช"), "shop/optician": ("Optician", "๐Ÿ‘“"), "shop/outdoor": ("Outdoor Shop", "๐Ÿ•๏ธ"), "shop/outpost": ("Outpost", "๐Ÿ“ฆ"), "shop/paint": ("Paint Shop", "๐ŸŽจ"), "shop/party": ("Party Shop", "๐ŸŽˆ"), "shop/pastry": ("Pastry Shop", "๐Ÿฅ"), "shop/pawnbroker": ("Pawnbroker", "๐Ÿ’ฐ"), "shop/perfumery": ("Perfumery", "๐ŸŒธ"), "shop/pet": ("Pet Shop", "๐Ÿพ"), "shop/pet_grooming": ("Pet Grooming", "๐Ÿฉ"), "shop/photo": ("Photo Shop", "๐Ÿ“ธ"), "shop/piercing": ("Piercing Studio", "๐Ÿ’Ž"), "shop/plant_hire": ("Plant Hire", "๐Ÿšœ"), "shop/pottery": ("Pottery Shop", "๐Ÿบ"), "shop/printer_ink": ("Ink & Toner Shop", "๐Ÿ–จ๏ธ"), "shop/printing": ("Print Shop", "๐Ÿ–จ๏ธ"), "shop/psychic": ("Psychic", "๐Ÿ”ฎ"), "shop/pyrotechnics": ("Fireworks Shop", "๐ŸŽ†"), "shop/religion": ("Religious Shop", "โœ๏ธ"), "shop/rental": ("Rental Shop", "๐Ÿ”‘"), "shop/repair": ("Repair Shop", "๐Ÿ”ง"), "shop/scuba_diving": ("Scuba Diving Shop", "๐Ÿคฟ"), "shop/seafood": ("Fishmonger", "๐ŸŸ"), "shop/second_hand": ("Second-Hand Shop", "โ™ป๏ธ"), "shop/security": ("Security Shop", "๐Ÿ”’"), "shop/sewing": ("Sewing Shop", "๐Ÿชก"), "shop/shoe_repair": ("Shoe Repair", "๐Ÿ‘ž"), "shop/shoes": ("Shoe Shop", "๐Ÿ‘Ÿ"), "shop/sports": ("Sports Shop", "โšฝ"), "shop/stationery": ("Stationery Shop", "โœ๏ธ"), "shop/storage_rental": ("Self Storage", "๐Ÿ“ฆ"), "shop/supermarket": ("Supermarket", "๐Ÿ›’"), "shop/swimming_pool": ("Pool Supplies", "๐ŸŠ"), "shop/tailor": ("Tailor", "๐Ÿงต"), "shop/tattoo": ("Tattoo Studio", "๐Ÿ–‹๏ธ"), "shop/taxi": ("Taxi Booking", "๐Ÿš•"), "shop/tea": ("Tea Shop", "๐Ÿซ–"), "shop/telecommunication": ("Telecoms Shop", "๐Ÿ“ก"), "shop/ticket": ("Ticket Office", "๐ŸŽซ"), "shop/tiles": ("Tile Shop", "๐Ÿ”ฒ"), "shop/tobacco": ("Tobacconist", "๐Ÿšฌ"), "shop/tool_hire": ("Tool Hire", "๐Ÿงฐ"), "shop/toys": ("Toy Shop", "๐Ÿงธ"), "shop/trade": ("Trade Supplier", "๐Ÿญ"), "shop/travel_agency": ("Travel Agency", "โœˆ๏ธ"), "shop/trophy": ("Trophy Shop", "๐Ÿ†"), "shop/tyres": ("Tyre Shop", "๐Ÿ›ž"), "shop/vacant": ("Vacant Shop", "๐Ÿš๏ธ"), "shop/variety_store": ("Variety Store", "๐Ÿช"), "shop/video": ("Video Shop", "๐Ÿ“€"), "shop/video_games": ("Video Game Shop", "๐ŸŽฎ"), "shop/watches": ("Watch Shop", "โŒš"), "shop/water_sports": ("Water Sports Shop", "๐Ÿ„"), "shop/weapons": ("Weapons Shop", "๐Ÿ—ก๏ธ"), "shop/wedding": ("Wedding Shop", "๐Ÿ’’"), "shop/wholesale": ("Wholesaler", "๐Ÿ“ฆ"), "shop/wigs": ("Wig Shop", "๐Ÿ’‡"), "shop/window_blind": ("Blinds Shop", "๐ŸชŸ"), "shop/windows": ("Window Shop", "๐ŸชŸ"), "shop/wine": ("Wine Shop", "๐Ÿท"), "shop/wool": ("Wool Shop", "๐Ÿงถ"), "shop/yes": ("Shop", "๐Ÿ›๏ธ"), # tourism "tourism/artwork": ("Public Artwork", "๐ŸŽจ"), "tourism/attraction": ("Tourist Attraction", "๐Ÿ“ธ"), "tourism/camp_site": ("Campsite", "โ›บ"), "tourism/caravan_site": ("Caravan Site", "๐Ÿš"), "tourism/chalet": ("Chalet", "๐Ÿ”๏ธ"), "tourism/gallery": ("Gallery", "๐Ÿ–ผ๏ธ"), "tourism/guest_house": ("Guest House", "๐Ÿก"), "tourism/hostel": ("Hostel", "๐Ÿ›๏ธ"), "tourism/hotel": ("Hotel", "๐Ÿจ"), "tourism/motel": ("Motel", "๐Ÿจ"), "tourism/museum": ("Museum", "๐Ÿ›๏ธ"), "tourism/picnic_site": ("Picnic Site", "๐Ÿงบ"), "tourism/preserved_railway": ("Heritage Railway", "๐Ÿš‚"), "tourism/theme_park": ("Theme Park", "๐ŸŽข"), "tourism/viewpoint": ("Viewpoint", "๐Ÿ”ญ"), "tourism/zoo": ("Zoo", "๐Ÿฆ"), } def transform(input_path: Path) -> pl.LazyFrame: lf = pl.scan_parquet(input_path) # Get all unique categories present in the data all_categories = lf.select("category").unique().collect().to_series().to_list() # Verify every non-dropped category has a mapping unmapped = [] for cat in all_categories: if cat not in DROP_CATEGORIES and cat not in CATEGORY_MAP: unmapped.append(cat) if unmapped: raise ValueError(f"Categories missing from CATEGORY_MAP: {sorted(unmapped)}") # Verify every CATEGORY_MAP key actually exists in the data (catch typos) mapped_but_absent = [] all_set = set(all_categories) for cat in CATEGORY_MAP: if cat not in all_set: mapped_but_absent.append(cat) if mapped_but_absent: raise ValueError( f"CATEGORY_MAP contains categories not in data: {sorted(mapped_but_absent)}" ) # Drop unwanted categories lf = lf.filter(~pl.col("category").is_in(list(DROP_CATEGORIES))) # Build name and emoji lookup expressions name_mapping = {k: v[0] for k, v in CATEGORY_MAP.items()} emoji_mapping = {k: v[1] for k, v in CATEGORY_MAP.items()} # Check no friendly names are missing (defensive) missing_names = [k for k, v in CATEGORY_MAP.items() if not v[0]] if missing_names: raise ValueError(f"Empty friendly names for: {missing_names}") missing_emojis = [k for k, v in CATEGORY_MAP.items() if not v[1]] if missing_emojis: raise ValueError(f"Empty emojis for: {missing_emojis}") lf = lf.with_columns( pl.col("category").replace_strict(name_mapping).alias("category"), pl.col("category").replace_strict(emoji_mapping).alias("emoji"), ) return lf def main(): parser = argparse.ArgumentParser( description="Transform raw POIs to filtered version with friendly names" ) parser.add_argument( "--input", type=Path, required=True, help="Raw POIs parquet file" ) parser.add_argument( "--output", type=Path, required=True, help="Output filtered POIs parquet file" ) args = parser.parse_args() df = transform(args.input).collect() df.write_parquet(args.output) size_mb = args.output.stat().st_size / (1024 * 1024) print(f"Wrote {args.output} ({size_mb:.1f} MB, {len(df):,} POIs)") print(f"\nCategories ({df['category'].n_unique()}):") counts = df.group_by("category", "emoji").len().sort("len", descending=True) for row in counts.iter_rows(named=True): print(f" {row['emoji']} {row['category']}: {row['len']:,}") if __name__ == "__main__": main()