perfect-postcode/pipeline/download/map_assets.py
Andras Schmelczer be02fc16bb
Some checks failed
Build and publish Docker image / build-and-push (push) Failing after 8m20s
CI / Check (push) Failing after 10m40s
Test changes
2026-05-09 11:35:38 +01:00

419 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import argparse
import base64
import json
import re
import sys
import urllib.request
from concurrent.futures import ThreadPoolExecutor, as_completed
from io import BytesIO
from pathlib import Path
from PIL import Image, ImageDraw
from pipeline.transform.transform_poi import NAPTAN_EMOJIS, _CATEGORIES
GLYPHS_BASE = "https://protomaps.github.io/basemaps-assets/fonts"
SPRITES_BASE = "https://protomaps.github.io/basemaps-assets/sprites/v4"
TWEMOJI_BASE = "https://cdn.jsdelivr.net/gh/twitter/twemoji@14.0.2/assets/72x72"
POI_ICON_BASE = "https://geolytix.github.io/MapIcons"
# Font stacks used by @protomaps/basemaps with lang='en'
FONT_STACKS = ["Noto Sans Regular", "Noto Sans Italic", "Noto Sans Medium"]
POI_ICON_PATHS = [
"brands_2023/supermarkets/farmfoods.svg",
"brands_2023/supermarkets/heron_foods.svg",
"brands_2023/supermarkets/little_waitrose.svg",
"brands_2024/amazon_fresh.svg",
"brands_2024/booths.svg",
"brands_2024/budgens.svg",
"brands_2024/cook.svg",
"brands_2024/dunnes_stores.svg",
"brands_2024/iceland.svg",
"brands_2024/makro.svg",
"brands_2024/mns.svg",
"brands_2024/morrisons_daily.svg",
"brands_2024/sainsburys_local.svg",
"brands_2024/wholefoods.svg",
"logos/aldi.svg",
"logos/asda.svg",
"logos/centra.svg",
"logos/coop.svg",
"logos/lidl.svg",
"logos/morrisons.svg",
"logos/planet_organic.svg",
"logos/sainsburys.svg",
"logos/spar.svg",
"logos/tesco.svg",
"logos/tesco_express.svg",
"logos/tesco_extra.svg",
"logos/waitrose.svg",
"public_transport/london_tube.svg",
"visuals/mns.svg",
]
DERIVED_POI_ICON_PATHS = [
("costco_logo", "brands/costco.svg", "logos/costco.svg"),
(
"embedded_png",
"brands/iceland_food_warehouse_24px.svg",
"logos/the_food_warehouse.png",
),
]
POI_ICON_SVG_CROPS = {
"brands_2023/supermarkets/farmfoods.svg": (1.293, 7.314, 15.48, 3.293),
"brands_2023/supermarkets/heron_foods.svg": (0.062, 6.68, 17.995, 5.325),
"brands_2023/supermarkets/little_waitrose.svg": (0.916, 5.645, 16.365, 6.719),
"brands_2024/amazon_fresh.svg": (3.817, 1.646, 16.367, 16.358),
"brands_2024/booths.svg": (1.456, 7.143, 15.313, 3.512),
"brands_2024/budgens.svg": (2.251, 2.278, 13.6, 13.612),
"brands_2024/cook.svg": (5.028, 5.493, 13.945, 9.648),
"brands_2024/dunnes_stores.svg": (4.375, 7.732, 15.249, 5.055),
"brands_2024/iceland.svg": (1.136, 6.823, 16.067, 4.302),
"brands_2024/makro.svg": (4.411, 6.098, 16.397, 5.428),
"brands_2024/mns.svg": (4.042, 6.986, 16.171, 6.724),
"brands_2024/morrisons_daily.svg": (3.341, 4.414, 17.317, 8.248),
"brands_2024/sainsburys_local.svg": (4.58, 1.61, 14.84, 14.849),
"brands_2024/wholefoods.svg": (4.17, 2.193, 15.659, 15.668),
"logos/aldi.svg": (4.813, 2.563, 14.374, 14.383),
"logos/asda.svg": (3.91, 7.135, 16.181, 5.442),
"logos/centra.svg": (3.36, 7.35, 17.28, 4.651),
"logos/coop.svg": (6.407, 4.658, 11.187, 11.793),
"logos/costco.svg": (70.61, 144.908, 256.67, 85.825),
"logos/lidl.svg": (4.938, 2.973, 13.985, 13.985),
"logos/morrisons.svg": (5.231, 2.985, 13.538, 13.398),
"logos/planet_organic.svg": (5.528, 3.564, 12.943, 12.943),
"logos/sainsburys.svg": (7.502, 3.572, 8.996, 12.646),
"logos/spar.svg": (4.933, 2.968, 14.133, 13.853),
"logos/tesco.svg": (4.338, 6.865, 15.324, 5.359),
"logos/tesco_express.svg": (5.231, 5.933, 13.538, 8.345),
"logos/tesco_extra.svg": (4.933, 5.775, 14.133, 8.519),
"logos/waitrose.svg": (5.528, 6.09, 12.943, 9.855),
}
POI_ICON_SVG_INTRINSIC_MAX = 512
def collect_twemoji_codes() -> list[str]:
"""Derive twemoji hex codes from transform_poi categories.
Matches the frontend's emojiToTwemojiUrl() which does
emoji.codePointAt(0).toString(16).
"""
emojis: set[str] = set()
for _group, _name, emoji, _osm_keys in _CATEGORIES:
emojis.add(emoji)
for emoji in NAPTAN_EMOJIS.values():
emojis.add(emoji)
# First codepoint hex, matching frontend logic
return sorted({f"{ord(e[0]):x}" for e in emojis})
def download_file(url: str, dest: Path) -> tuple[bool, str]:
"""Download a single file. Returns (success, url)."""
dest.parent.mkdir(parents=True, exist_ok=True)
try:
urllib.request.urlretrieve(url, dest)
return True, url
except urllib.error.HTTPError as e:
print(f" {e.code} {url}", file=sys.stderr)
return False, url
except Exception as e:
print(f" ERROR {url}: {e}", file=sys.stderr)
return False, url
def download_text(url: str) -> str:
with urllib.request.urlopen(url) as response:
return response.read().decode("utf-8")
def build_costco_logo(marker_svg: str) -> str:
start = marker_svg.find('<g><path d=" M 316.312')
end = marker_svg.rfind("</g></g></svg>")
if start < 0 or end < 0:
raise ValueError("Costco marker SVG layout changed")
logo_group = marker_svg[start : end + 4]
return (
'<?xml version="1.0" encoding="UTF-8"?>\n'
'<svg xmlns="http://www.w3.org/2000/svg" viewBox="70 145 260 90" '
'width="260pt" height="90pt" preserveAspectRatio="xMidYMid meet">\n'
f"{logo_group}\n"
"</svg>\n"
)
def trim_white_png(png_bytes: bytes) -> bytes:
image = Image.open(BytesIO(png_bytes)).convert("RGBA")
pixels = image.load()
for y in range(image.height):
for x in range(image.width):
red, green, blue, alpha = pixels[x, y]
if red > 245 and green > 245 and blue > 245:
pixels[x, y] = (red, green, blue, 0)
alpha_box = image.getchannel("A").getbbox()
if alpha_box:
image = image.crop(alpha_box)
out = BytesIO()
image.save(out, format="PNG")
return out.getvalue()
def extract_embedded_png(marker_svg: str) -> bytes:
match = re.search(r"base64,([^\"']+)", marker_svg)
if not match:
raise ValueError("POI marker SVG did not contain an embedded PNG")
return trim_white_png(base64.b64decode(match.group(1)))
def svg_intrinsic_size(width: float, height: float) -> tuple[int, int]:
if width <= 0 or height <= 0:
return (POI_ICON_SVG_INTRINSIC_MAX, POI_ICON_SVG_INTRINSIC_MAX)
if width >= height:
return (
POI_ICON_SVG_INTRINSIC_MAX,
max(1, round(POI_ICON_SVG_INTRINSIC_MAX * height / width)),
)
return (
max(1, round(POI_ICON_SVG_INTRINSIC_MAX * width / height)),
POI_ICON_SVG_INTRINSIC_MAX,
)
def set_svg_geometry(svg_text: str, crop: tuple[float, float, float, float]) -> str:
x, y, width, height = crop
view_box = f"{x:g} {y:g} {width:g} {height:g}"
intrinsic_width, intrinsic_height = svg_intrinsic_size(width, height)
svg_text = re.sub(r'viewBox="[^"]+"', f'viewBox="{view_box}"', svg_text, count=1)
if 'viewBox="' not in svg_text:
svg_text = re.sub(r"<svg\b", f'<svg viewBox="{view_box}"', svg_text, count=1)
svg_text = re.sub(r'width="[^"]+"', f'width="{intrinsic_width}"', svg_text, count=1)
if 'width="' not in svg_text:
svg_text = re.sub(
r"<svg\b", f'<svg width="{intrinsic_width}"', svg_text, count=1
)
svg_text = re.sub(
r'height="[^"]+"', f'height="{intrinsic_height}"', svg_text, count=1
)
if 'height="' not in svg_text:
svg_text = re.sub(
r"<svg\b", f'<svg height="{intrinsic_height}"', svg_text, count=1
)
return svg_text
def get_svg_view_box(svg_text: str) -> tuple[float, float, float, float] | None:
match = re.search(r'viewBox="([^"]+)"', svg_text)
if not match:
return None
parts = [
float(part) for part in re.split(r"[\s,]+", match.group(1).strip()) if part
]
if len(parts) != 4:
return None
return (parts[0], parts[1], parts[2], parts[3])
def crop_poi_svg_icons(poi_icons_dir: Path) -> None:
for icon_path, crop in POI_ICON_SVG_CROPS.items():
dest = poi_icons_dir / icon_path
if not dest.exists():
continue
svg_text = dest.read_text(encoding="utf-8")
if icon_path == "brands_2024/dunnes_stores.svg":
svg_text = svg_text.replace('fill="#fffcfc"', 'fill="#111111"')
svg_text = svg_text.replace('fill="#fcfcfc"', 'fill="#111111"')
dest.write_text(set_svg_geometry(svg_text, crop), encoding="utf-8")
for dest in poi_icons_dir.rglob("*.svg"):
svg_text = dest.read_text(encoding="utf-8")
view_box = get_svg_view_box(svg_text)
if view_box:
dest.write_text(set_svg_geometry(svg_text, view_box), encoding="utf-8")
def download_derived_poi_icon(
kind: str, source_path: str, dest: Path
) -> tuple[bool, str]:
url = f"{POI_ICON_BASE}/{source_path}"
dest.parent.mkdir(parents=True, exist_ok=True)
try:
source = download_text(url)
if kind == "costco_logo":
dest.write_text(build_costco_logo(source), encoding="utf-8")
elif kind == "embedded_png":
dest.write_bytes(extract_embedded_png(source))
else:
raise ValueError(f"Unknown derived POI icon kind: {kind}")
return True, url
except urllib.error.HTTPError as e:
print(f" {e.code} {url}", file=sys.stderr)
return False, url
except Exception as e:
print(f" ERROR {url}: {e}", file=sys.stderr)
return False, url
# Slategray accent used by civic POI icons (school, library, building, …) in
# protomaps' v4 sprite. We match it so the townhall blends in with its peers.
_TOWNHALL_COLOR = {
"light": (135, 128, 171),
"dark": (118, 118, 127),
}
_TOWNHALL_LOGICAL_SIZE = 17
def _render_townhall_glyph(size_px: int, color: tuple[int, int, int]) -> Image.Image:
# Draw at 8× resolution and downsample with Lanczos so the pediment's
# diagonals come out anti-aliased; PIL's polygon fill is otherwise aliased.
super_factor = 8
canvas = size_px * super_factor
img = Image.new("RGBA", (canvas, canvas), (0, 0, 0, 0))
draw = ImageDraw.Draw(img)
fill = (*color, 255)
def s(v: float) -> float:
return v * canvas / _TOWNHALL_LOGICAL_SIZE
draw.polygon([(s(8.5), s(1)), (s(15), s(6.5)), (s(2), s(6.5))], fill=fill)
draw.rectangle([(s(1), s(6.5)), (s(16), s(8.5))], fill=fill)
for column_x in (3, 8, 13):
draw.rectangle([(s(column_x), s(8.5)), (s(column_x + 1.5), s(14))], fill=fill)
draw.rectangle([(s(0), s(14)), (s(17), s(15.5))], fill=fill)
return img.resize((size_px, size_px), Image.LANCZOS)
def inject_townhall_sprite(sprites_dir: Path) -> None:
"""Append a townhall glyph to each downloaded sprite sheet.
Protomaps' v4 sprite omits `townhall` even though the basemap style
references it; we add the icon here so MapLibre can resolve the name
natively at runtime.
"""
for theme in ("light", "dark"):
color = _TOWNHALL_COLOR[theme]
for suffix, scale in (("", 1), ("@2x", 2)):
json_path = sprites_dir / f"{theme}{suffix}.json"
png_path = sprites_dir / f"{theme}{suffix}.png"
if not json_path.exists() or not png_path.exists():
continue
manifest = json.loads(json_path.read_text())
sheet = Image.open(png_path).convert("RGBA")
glyph_size = _TOWNHALL_LOGICAL_SIZE * scale
glyph = _render_townhall_glyph(glyph_size, color)
new_width = max(sheet.width, glyph_size)
new_height = sheet.height + glyph_size
extended = Image.new("RGBA", (new_width, new_height), (0, 0, 0, 0))
extended.paste(sheet, (0, 0))
extended.paste(glyph, (0, sheet.height))
extended.save(png_path, optimize=True)
manifest["townhall"] = {
"x": 0,
"y": sheet.height,
"width": glyph_size,
"height": glyph_size,
"pixelRatio": scale,
}
json_path.write_text(json.dumps(manifest))
def main():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--output",
type=Path,
required=True,
help="Output directory",
)
args = parser.parse_args()
out: Path = args.output
twemoji_codes = collect_twemoji_codes()
# Build download list
tasks: list[tuple[str, Path]] = []
# Font glyphs: 256 range files per font stack
for font in FONT_STACKS:
font_encoded = font.replace(" ", "%20")
font_dir = out / "fonts" / font
for start in range(0, 65536, 256):
end = start + 255
name = f"{start}-{end}.pbf"
url = f"{GLYPHS_BASE}/{font_encoded}/{name}"
tasks.append((url, font_dir / name))
# Sprite sheets (light/dark, 1x and 2x)
sprites_dir = out / "sprites"
for theme in ("light", "dark"):
for suffix in ("json", "png"):
url = f"{SPRITES_BASE}/{theme}.{suffix}"
tasks.append((url, sprites_dir / f"{theme}.{suffix}"))
url_2x = f"{SPRITES_BASE}/{theme}@2x.{suffix}"
tasks.append((url_2x, sprites_dir / f"{theme}@2x.{suffix}"))
# Twemoji PNGs
twemoji_dir = out / "twemoji"
for code in twemoji_codes:
url = f"{TWEMOJI_BASE}/{code}.png"
tasks.append((url, twemoji_dir / f"{code}.png"))
# Branded POI icons are served from this local bundle at runtime.
poi_icons_dir = out / "poi-icons"
for icon_path in POI_ICON_PATHS:
url = f"{POI_ICON_BASE}/{icon_path}"
tasks.append((url, poi_icons_dir / icon_path))
# Skip already-downloaded files
remaining = [(url, dest) for url, dest in tasks]
print(f"Downloading {len(remaining) + len(DERIVED_POI_ICON_PATHS)} assets")
ok = 0
fail = 0
with ThreadPoolExecutor(max_workers=20) as pool:
futures = {
pool.submit(download_file, url, dest): url for url, dest in remaining
}
for future in as_completed(futures):
success, url = future.result()
if success:
ok += 1
else:
fail += 1
for kind, source_path, dest_path in DERIVED_POI_ICON_PATHS:
success, _url = download_derived_poi_icon(
kind, source_path, poi_icons_dir / dest_path
)
if success:
ok += 1
else:
fail += 1
crop_poi_svg_icons(poi_icons_dir)
inject_townhall_sprite(sprites_dir)
print(f"Done: {ok} downloaded, {fail} failed")
if __name__ == "__main__":
main()