import argparse
import base64
import json
import re
import sys
import urllib.request
from concurrent.futures import ThreadPoolExecutor, as_completed
from io import BytesIO
from pathlib import Path
from PIL import Image, ImageDraw
from pipeline.transform.transform_poi import NAPTAN_EMOJIS, _CATEGORIES
GLYPHS_BASE = "https://protomaps.github.io/basemaps-assets/fonts"
SPRITES_BASE = "https://protomaps.github.io/basemaps-assets/sprites/v4"
TWEMOJI_BASE = "https://cdn.jsdelivr.net/gh/twitter/twemoji@14.0.2/assets/72x72"
POI_ICON_BASE = "https://geolytix.github.io/MapIcons"
# Font stacks used by @protomaps/basemaps with lang='en'
FONT_STACKS = ["Noto Sans Regular", "Noto Sans Italic", "Noto Sans Medium"]
POI_ICON_PATHS = [
"brands_2023/supermarkets/farmfoods.svg",
"brands_2023/supermarkets/heron_foods.svg",
"brands_2023/supermarkets/little_waitrose.svg",
"brands_2024/amazon_fresh.svg",
"brands_2024/booths.svg",
"brands_2024/budgens.svg",
"brands_2024/cook.svg",
"brands_2024/dunnes_stores.svg",
"brands_2024/iceland.svg",
"brands_2024/makro.svg",
"brands_2024/mns.svg",
"brands_2024/morrisons_daily.svg",
"brands_2024/sainsburys_local.svg",
"brands_2024/wholefoods.svg",
"logos/aldi.svg",
"logos/asda.svg",
"logos/centra.svg",
"logos/coop.svg",
"logos/lidl.svg",
"logos/morrisons.svg",
"logos/planet_organic.svg",
"logos/sainsburys.svg",
"logos/spar.svg",
"logos/tesco.svg",
"logos/tesco_express.svg",
"logos/tesco_extra.svg",
"logos/waitrose.svg",
"public_transport/london_tube.svg",
"visuals/mns.svg",
]
DERIVED_POI_ICON_PATHS = [
("costco_logo", "brands/costco.svg", "logos/costco.svg"),
(
"embedded_png",
"brands/iceland_food_warehouse_24px.svg",
"logos/the_food_warehouse.png",
),
]
POI_ICON_SVG_CROPS = {
"brands_2023/supermarkets/farmfoods.svg": (1.293, 7.314, 15.48, 3.293),
"brands_2023/supermarkets/heron_foods.svg": (0.062, 6.68, 17.995, 5.325),
"brands_2023/supermarkets/little_waitrose.svg": (0.916, 5.645, 16.365, 6.719),
"brands_2024/amazon_fresh.svg": (3.817, 1.646, 16.367, 16.358),
"brands_2024/booths.svg": (1.456, 7.143, 15.313, 3.512),
"brands_2024/budgens.svg": (2.251, 2.278, 13.6, 13.612),
"brands_2024/cook.svg": (5.028, 5.493, 13.945, 9.648),
"brands_2024/dunnes_stores.svg": (4.375, 7.732, 15.249, 5.055),
"brands_2024/iceland.svg": (1.136, 6.823, 16.067, 4.302),
"brands_2024/makro.svg": (4.411, 6.098, 16.397, 5.428),
"brands_2024/mns.svg": (4.042, 6.986, 16.171, 6.724),
"brands_2024/morrisons_daily.svg": (3.341, 4.414, 17.317, 8.248),
"brands_2024/sainsburys_local.svg": (4.58, 1.61, 14.84, 14.849),
"brands_2024/wholefoods.svg": (4.17, 2.193, 15.659, 15.668),
"logos/aldi.svg": (4.813, 2.563, 14.374, 14.383),
"logos/asda.svg": (3.91, 7.135, 16.181, 5.442),
"logos/centra.svg": (3.36, 7.35, 17.28, 4.651),
"logos/coop.svg": (6.407, 4.658, 11.187, 11.793),
"logos/costco.svg": (70.61, 144.908, 256.67, 85.825),
"logos/lidl.svg": (4.938, 2.973, 13.985, 13.985),
"logos/morrisons.svg": (5.231, 2.985, 13.538, 13.398),
"logos/planet_organic.svg": (5.528, 3.564, 12.943, 12.943),
"logos/sainsburys.svg": (7.502, 3.572, 8.996, 12.646),
"logos/spar.svg": (4.933, 2.968, 14.133, 13.853),
"logos/tesco.svg": (4.338, 6.865, 15.324, 5.359),
"logos/tesco_express.svg": (5.231, 5.933, 13.538, 8.345),
"logos/tesco_extra.svg": (4.933, 5.775, 14.133, 8.519),
"logos/waitrose.svg": (5.528, 6.09, 12.943, 9.855),
}
POI_ICON_SVG_INTRINSIC_MAX = 512
def collect_twemoji_codes() -> list[str]:
"""Derive twemoji hex codes from transform_poi categories.
Matches the frontend's emojiToTwemojiUrl() which does
emoji.codePointAt(0).toString(16).
"""
emojis: set[str] = set()
for _group, _name, emoji, _osm_keys in _CATEGORIES:
emojis.add(emoji)
for emoji in NAPTAN_EMOJIS.values():
emojis.add(emoji)
# First codepoint hex, matching frontend logic
return sorted({f"{ord(e[0]):x}" for e in emojis})
def download_file(url: str, dest: Path) -> tuple[bool, str]:
"""Download a single file. Returns (success, url)."""
dest.parent.mkdir(parents=True, exist_ok=True)
try:
urllib.request.urlretrieve(url, dest)
return True, url
except urllib.error.HTTPError as e:
print(f" {e.code} {url}", file=sys.stderr)
return False, url
except Exception as e:
print(f" ERROR {url}: {e}", file=sys.stderr)
return False, url
def download_text(url: str) -> str:
with urllib.request.urlopen(url) as response:
return response.read().decode("utf-8")
def build_costco_logo(marker_svg: str) -> str:
start = marker_svg.find('")
if start < 0 or end < 0:
raise ValueError("Costco marker SVG layout changed")
logo_group = marker_svg[start : end + 4]
return (
'\n'
'\n"
)
def trim_white_png(png_bytes: bytes) -> bytes:
image = Image.open(BytesIO(png_bytes)).convert("RGBA")
pixels = image.load()
for y in range(image.height):
for x in range(image.width):
red, green, blue, alpha = pixels[x, y]
if red > 245 and green > 245 and blue > 245:
pixels[x, y] = (red, green, blue, 0)
alpha_box = image.getchannel("A").getbbox()
if alpha_box:
image = image.crop(alpha_box)
out = BytesIO()
image.save(out, format="PNG")
return out.getvalue()
def extract_embedded_png(marker_svg: str) -> bytes:
match = re.search(r"base64,([^\"']+)", marker_svg)
if not match:
raise ValueError("POI marker SVG did not contain an embedded PNG")
return trim_white_png(base64.b64decode(match.group(1)))
def svg_intrinsic_size(width: float, height: float) -> tuple[int, int]:
if width <= 0 or height <= 0:
return (POI_ICON_SVG_INTRINSIC_MAX, POI_ICON_SVG_INTRINSIC_MAX)
if width >= height:
return (
POI_ICON_SVG_INTRINSIC_MAX,
max(1, round(POI_ICON_SVG_INTRINSIC_MAX * height / width)),
)
return (
max(1, round(POI_ICON_SVG_INTRINSIC_MAX * width / height)),
POI_ICON_SVG_INTRINSIC_MAX,
)
def set_svg_geometry(svg_text: str, crop: tuple[float, float, float, float]) -> str:
x, y, width, height = crop
view_box = f"{x:g} {y:g} {width:g} {height:g}"
intrinsic_width, intrinsic_height = svg_intrinsic_size(width, height)
svg_text = re.sub(r'viewBox="[^"]+"', f'viewBox="{view_box}"', svg_text, count=1)
if 'viewBox="' not in svg_text:
svg_text = re.sub(r"