147 lines
5.6 KiB
Python
147 lines
5.6 KiB
Python
"""Configuration for POI extraction from OpenStreetMap."""
|
|
|
|
from pathlib import Path
|
|
|
|
# File paths
|
|
DATA_DIR = Path(__file__).parent.parent.parent / "data_sources"
|
|
GB_PBF_FILE = DATA_DIR / "great-britain-latest.osm.pbf"
|
|
OUTPUT_FILE = DATA_DIR / "uk_pois.parquet"
|
|
|
|
# Geofabrik download URL for Great Britain (~1.1GB PBF, much faster than planet XML)
|
|
GEOFABRIK_GB_URL = (
|
|
"https://download.geofabrik.de/europe/great-britain-latest.osm.pbf"
|
|
)
|
|
|
|
# UK bounding box (west, south, east, north) — used for way centroid filtering
|
|
UK_BBOX_WEST = -7.57
|
|
UK_BBOX_SOUTH = 49.96
|
|
UK_BBOX_EAST = 1.68
|
|
UK_BBOX_NORTH = 58.64
|
|
|
|
# OSM tag mapping to categories
|
|
# Maps (tag_key, tag_value) -> category name
|
|
OSM_TAG_MAPPING: dict[tuple[str, str], str] = {
|
|
# Education
|
|
("amenity", "school"): "school",
|
|
("amenity", "kindergarten"): "preschool",
|
|
("amenity", "college"): "college_university",
|
|
("amenity", "university"): "college_university",
|
|
("amenity", "library"): "library",
|
|
("amenity", "language_school"): "school",
|
|
("amenity", "music_school"): "school",
|
|
("amenity", "driving_school"): "school",
|
|
# Healthcare
|
|
("amenity", "hospital"): "hospital",
|
|
("amenity", "clinic"): "public_health_clinic",
|
|
("amenity", "doctors"): "doctor",
|
|
("amenity", "dentist"): "dentist",
|
|
("amenity", "pharmacy"): "pharmacy",
|
|
("amenity", "veterinary"): "veterinary",
|
|
("amenity", "nursing_home"): "nursing_home",
|
|
("amenity", "social_facility"): "social_facility",
|
|
# Transport
|
|
("railway", "station"): "train_station",
|
|
("railway", "halt"): "train_station",
|
|
("railway", "tram_stop"): "tram_stop",
|
|
("amenity", "bus_station"): "bus_station",
|
|
("amenity", "ferry_terminal"): "ferry_terminal",
|
|
("public_transport", "station"): "train_station",
|
|
("public_transport", "stop_position"): "bus_stop",
|
|
("station", "subway"): "metro_station",
|
|
("station", "light_rail"): "light_rail_station",
|
|
("aeroway", "aerodrome"): "airport",
|
|
("highway", "bus_stop"): "bus_stop",
|
|
# Parks & Leisure
|
|
("leisure", "park"): "park",
|
|
("leisure", "nature_reserve"): "nature_reserve",
|
|
("leisure", "dog_park"): "dog_park",
|
|
("leisure", "playground"): "playground",
|
|
("leisure", "sports_centre"): "sports_centre",
|
|
("leisure", "swimming_pool"): "swimming_pool",
|
|
("leisure", "fitness_centre"): "gym",
|
|
("leisure", "golf_course"): "golf_course",
|
|
("leisure", "garden"): "garden",
|
|
("leisure", "marina"): "marina",
|
|
("boundary", "national_park"): "national_park",
|
|
# Emergency
|
|
("amenity", "police"): "police_department",
|
|
("amenity", "fire_station"): "fire_department",
|
|
# Shopping
|
|
("shop", "supermarket"): "supermarket",
|
|
("shop", "convenience"): "convenience_store",
|
|
("shop", "grocery"): "grocery_store",
|
|
("shop", "bakery"): "bakery",
|
|
("shop", "butcher"): "butcher",
|
|
("shop", "greengrocer"): "greengrocer",
|
|
("shop", "deli"): "deli",
|
|
("shop", "department_store"): "department_store",
|
|
("shop", "clothes"): "clothing_store",
|
|
("shop", "shoes"): "shoe_store",
|
|
("shop", "electronics"): "electronics_store",
|
|
("shop", "hardware"): "hardware_store",
|
|
("shop", "furniture"): "furniture_store",
|
|
("shop", "car"): "car_dealer",
|
|
("shop", "car_repair"): "car_repair",
|
|
("shop", "hairdresser"): "hairdresser",
|
|
("shop", "beauty"): "beauty_salon",
|
|
("shop", "optician"): "optician",
|
|
("shop", "newsagent"): "newsagent",
|
|
("shop", "books"): "bookshop",
|
|
("shop", "charity"): "charity_shop",
|
|
("shop", "alcohol"): "off_licence",
|
|
("shop", "laundry"): "laundry",
|
|
("shop", "dry_cleaning"): "dry_cleaning",
|
|
("shop", "mall"): "shopping_centre",
|
|
# Food & Drink
|
|
("amenity", "restaurant"): "restaurant",
|
|
("amenity", "cafe"): "cafe",
|
|
("amenity", "pub"): "pub",
|
|
("amenity", "bar"): "bar",
|
|
("amenity", "fast_food"): "fast_food",
|
|
("amenity", "food_court"): "food_court",
|
|
("amenity", "ice_cream"): "ice_cream",
|
|
("amenity", "biergarten"): "beer_garden",
|
|
# Finance
|
|
("amenity", "bank"): "bank",
|
|
("amenity", "atm"): "atm",
|
|
("amenity", "bureau_de_change"): "bureau_de_change",
|
|
# Entertainment & Culture
|
|
("amenity", "cinema"): "cinema",
|
|
("amenity", "theatre"): "theatre",
|
|
("amenity", "nightclub"): "nightclub",
|
|
("amenity", "community_centre"): "community_centre",
|
|
("amenity", "arts_centre"): "arts_centre",
|
|
("tourism", "museum"): "museum",
|
|
("tourism", "gallery"): "gallery",
|
|
("tourism", "attraction"): "attraction",
|
|
("tourism", "zoo"): "zoo",
|
|
("tourism", "theme_park"): "theme_park",
|
|
("tourism", "viewpoint"): "viewpoint",
|
|
# Accommodation
|
|
("tourism", "hotel"): "hotel",
|
|
("tourism", "hostel"): "hostel",
|
|
("tourism", "guest_house"): "guest_house",
|
|
("tourism", "camp_site"): "campsite",
|
|
("tourism", "caravan_site"): "caravan_site",
|
|
# Religion
|
|
("amenity", "place_of_worship"): "place_of_worship",
|
|
# Government & Public
|
|
("amenity", "townhall"): "town_hall",
|
|
("amenity", "courthouse"): "courthouse",
|
|
("amenity", "post_office"): "post_office",
|
|
("amenity", "prison"): "prison",
|
|
("amenity", "recycling"): "recycling",
|
|
("amenity", "waste_disposal"): "waste_disposal",
|
|
("amenity", "toilets"): "public_toilets",
|
|
# Fuel
|
|
("amenity", "fuel"): "petrol_station",
|
|
("amenity", "charging_station"): "ev_charging",
|
|
# Parking
|
|
("amenity", "parking"): "parking",
|
|
("amenity", "bicycle_parking"): "bicycle_parking",
|
|
}
|
|
|
|
# Build reverse lookup: tag_key -> set of tag_values we care about
|
|
TAG_KEYS_TO_CHECK: dict[str, set[str]] = {}
|
|
for (key, value), _ in OSM_TAG_MAPPING.items():
|
|
TAG_KEYS_TO_CHECK.setdefault(key, set()).add(value)
|