England only

This commit is contained in:
Andras Schmelczer 2026-03-15 14:03:38 +00:00
parent 4d08f5d08d
commit 02712f41e8
8 changed files with 294 additions and 60 deletions

View file

@ -0,0 +1,45 @@
"""Download England country boundary GeoJSON from ONS Open Geography Portal.
Source: ONS Countries (December 2024) Boundaries UK BGC (Generalised Clipped)
Licence: OGL v3
"""
import argparse
from pathlib import Path
import httpx
# ArcGIS REST API — query for England only, generalised (BGC) resolution
URL = (
"https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/"
"Countries_December_2024_Boundaries_UK_BGC/FeatureServer/0/query"
"?where=CTRY24NM%3D%27England%27&outFields=CTRY24NM&f=geojson"
)
def main() -> None:
parser = argparse.ArgumentParser(
description="Download England country boundary GeoJSON"
)
parser.add_argument(
"--output", type=Path, required=True, help="Output GeoJSON file path"
)
args = parser.parse_args()
args.output.parent.mkdir(parents=True, exist_ok=True)
print("Downloading England boundary from ONS...")
response = httpx.get(URL, follow_redirects=True, timeout=60)
response.raise_for_status()
data = response.json()
features = data.get("features", [])
if len(features) != 1:
raise ValueError(f"Expected 1 feature for England, got {len(features)}")
args.output.write_text(response.text)
size_kb = args.output.stat().st_size / 1024
print(f"Saved to {args.output} ({size_kb:.0f} KB)")
if __name__ == "__main__":
main()

View file

@ -7,6 +7,7 @@ from pathlib import Path
from pipeline.transform.transform_poi import NAPTAN_EMOJIS, _CATEGORIES
GLYPHS_BASE = "https://protomaps.github.io/basemaps-assets/fonts"
SPRITES_BASE = "https://protomaps.github.io/basemaps-assets/sprites/v4"
TWEMOJI_BASE = "https://cdn.jsdelivr.net/gh/twitter/twemoji@14.0.2/assets/72x72"
# Font stacks used by @protomaps/basemaps with lang='en'
@ -77,6 +78,15 @@ def main():
url = f"{GLYPHS_BASE}/{font_encoded}/{name}"
tasks.append((url, font_dir / name))
# Sprite sheets (light/dark, 1x and 2x)
sprites_dir = out / "sprites"
for theme in ("light", "dark"):
for suffix in ("json", "png"):
url = f"{SPRITES_BASE}/{theme}.{suffix}"
tasks.append((url, sprites_dir / f"{theme}.{suffix}"))
url_2x = f"{SPRITES_BASE}/{theme}@2x.{suffix}"
tasks.append((url_2x, sprites_dir / f"{theme}@2x.{suffix}"))
# Twemoji PNGs
twemoji_dir = out / "twemoji"
for code in twemoji_codes:

View file

@ -2,7 +2,7 @@
Extracts named place nodes (cities, towns, suburbs, etc.) and railway stations
(tube, national rail, DLR, etc.) for typeahead search.
Reuses the same great-britain-latest.osm.pbf as pois.py.
Reuses the same england-latest.osm.pbf as pois.py.
"""
import argparse
@ -10,9 +10,16 @@ from pathlib import Path
import osmium
import polars as pl
from shapely.geometry import Point
from tqdm import tqdm
from .pois import UK_BBOX_EAST, UK_BBOX_NORTH, UK_BBOX_SOUTH, UK_BBOX_WEST
from pipeline.download.pois import (
ENGLAND_BBOX_EAST,
ENGLAND_BBOX_NORTH,
ENGLAND_BBOX_SOUTH,
ENGLAND_BBOX_WEST,
)
from pipeline.utils.england_geometry import load_england_polygon
PLACE_TYPES = {"city"}
@ -57,10 +64,11 @@ def _station_display_name(name: str, tags: dict[str, str]) -> str:
class PlaceHandler(osmium.SimpleHandler):
def __init__(self, progress: tqdm) -> None:
def __init__(self, progress: tqdm, england_polygon) -> None:
super().__init__()
self._progress = progress
self.places: list[dict] = []
self._england = england_polygon
def _add(
self, name: str, place_type: str, lat: float, lon: float, population: int
@ -82,10 +90,12 @@ class PlaceHandler(osmium.SimpleHandler):
return
lat, lon = n.location.lat, n.location.lon
if not (
UK_BBOX_SOUTH <= lat <= UK_BBOX_NORTH
and UK_BBOX_WEST <= lon <= UK_BBOX_EAST
ENGLAND_BBOX_SOUTH <= lat <= ENGLAND_BBOX_NORTH
and ENGLAND_BBOX_WEST <= lon <= ENGLAND_BBOX_EAST
):
return
if not self._england.contains(Point(lon, lat)):
return
name = n.tags.get("name:en", n.tags.get("name", ""))
if not name:
@ -124,9 +134,17 @@ def main() -> None:
parser.add_argument(
"--pbf", type=Path, required=True, help="Path to OSM PBF file"
)
parser.add_argument(
"--boundary",
type=Path,
required=True,
help="England boundary GeoJSON file",
)
args = parser.parse_args()
pbf_file = args.pbf
england_polygon = load_england_polygon(args.boundary)
print("Extracting place nodes: cities + railway stations")
with tqdm(
unit=" elements",
@ -135,7 +153,7 @@ def main() -> None:
smoothing=0.05,
mininterval=1.0,
) as progress:
handler = PlaceHandler(progress)
handler = PlaceHandler(progress, england_polygon)
handler.apply_file(str(pbf_file), locations=True)
print(f"Extracted {len(handler.places):,} place nodes")

View file

@ -4,17 +4,20 @@ from tempfile import mkdtemp
import osmium
import polars as pl
from shapely.geometry import Point
from tqdm import tqdm
from pipeline.utils.england_geometry import load_england_polygon
BATCH_SIZE = 50_000
MIN_OCCURENCE_COUNT = 20
UK_BBOX_WEST = -7.57
UK_BBOX_SOUTH = 49.96
UK_BBOX_EAST = 1.68
UK_BBOX_NORTH = 58.64
# Bounding box for fast pre-filtering before the precise polygon check
ENGLAND_BBOX_WEST = -6.45
ENGLAND_BBOX_SOUTH = 49.85
ENGLAND_BBOX_EAST = 1.77
ENGLAND_BBOX_NORTH = 55.82
POI_TAG_KEYS: list[str] = [
"amenity",
@ -31,19 +34,23 @@ POI_TAG_KEYS: list[str] = [
class POIHandler(osmium.SimpleHandler):
def __init__(self, progress: tqdm, tmp_dir: Path) -> None:
def __init__(self, progress: tqdm, tmp_dir: Path, england_polygon) -> None:
super().__init__()
self._batch: list[dict] = []
self._tmp_dir = tmp_dir
self._batch_num = 0
self.poi_count = 0
self._progress = progress
self._england = england_polygon
def _in_uk(self, lat: float, lon: float) -> bool:
return (
UK_BBOX_SOUTH <= lat <= UK_BBOX_NORTH
and UK_BBOX_WEST <= lon <= UK_BBOX_EAST
)
def _in_england(self, lat: float, lon: float) -> bool:
# Fast bbox pre-filter, then precise polygon check
if not (
ENGLAND_BBOX_SOUTH <= lat <= ENGLAND_BBOX_NORTH
and ENGLAND_BBOX_WEST <= lon <= ENGLAND_BBOX_EAST
):
return False
return self._england.contains(Point(lon, lat))
def _match_tags(self, tags: osmium.osm.TagList) -> list[str]:
return [f"{key}/{tags[key]}" for key in POI_TAG_KEYS if key in tags]
@ -90,7 +97,7 @@ class POIHandler(osmium.SimpleHandler):
if not n.location.valid:
return
lat, lon = n.location.lat, n.location.lon
if not self._in_uk(lat, lon):
if not self._in_england(lat, lon):
return
categories = self._match_tags(n.tags)
for category in categories:
@ -107,11 +114,19 @@ def main() -> None:
parser.add_argument(
"--pbf", type=Path, required=True, help="Path to OSM PBF file"
)
parser.add_argument(
"--boundary",
type=Path,
required=True,
help="England boundary GeoJSON file",
)
args = parser.parse_args()
pbf_file = args.pbf
print(f"Tag keys: {POI_TAG_KEYS}")
england_polygon = load_england_polygon(args.boundary)
tmp_dir = Path(mkdtemp(prefix="pois_"))
with tqdm(
unit=" elements",
@ -120,7 +135,7 @@ def main() -> None:
smoothing=0.05,
mininterval=1.0,
) as progress:
handler = POIHandler(progress, tmp_dir)
handler = POIHandler(progress, tmp_dir, england_polygon)
handler.apply_file(str(pbf_file), locations=True)
handler._flush_batch() # write any remaining POIs