Fix map_assets

This commit is contained in:
Andras Schmelczer 2026-02-07 20:29:34 +00:00
parent 4506263e5b
commit a7d528fb68
2 changed files with 109 additions and 1 deletions

View file

@ -0,0 +1,108 @@
import argparse
import sys
import urllib.request
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path
from pipeline.transform.transform_poi import NAPTAN_EMOJIS, _CATEGORIES
GLYPHS_BASE = "https://protomaps.github.io/basemaps-assets/fonts"
TWEMOJI_BASE = "https://cdn.jsdelivr.net/gh/twitter/twemoji@14.0.2/assets/72x72"
# Font stacks used by @protomaps/basemaps with lang='en'
FONT_STACKS = ["Noto Sans Regular", "Noto Sans Italic", "Noto Sans Medium"]
# Fallback emoji not in any category
_FALLBACK_EMOJIS = ["📍"]
def collect_twemoji_codes() -> list[str]:
"""Derive twemoji hex codes from transform_poi categories.
Matches the frontend's emojiToTwemojiUrl() which does
emoji.codePointAt(0).toString(16).
"""
emojis: set[str] = set()
for _group, _name, emoji, _osm_keys in _CATEGORIES:
emojis.add(emoji)
for emoji in NAPTAN_EMOJIS.values():
emojis.add(emoji)
for emoji in _FALLBACK_EMOJIS:
emojis.add(emoji)
# First codepoint hex, matching frontend logic
return sorted({f"{ord(e[0]):x}" for e in emojis})
def download_file(url: str, dest: Path) -> tuple[bool, str]:
"""Download a single file. Returns (success, url)."""
dest.parent.mkdir(parents=True, exist_ok=True)
try:
urllib.request.urlretrieve(url, dest)
return True, url
except urllib.error.HTTPError as e:
print(f" {e.code} {url}", file=sys.stderr)
return False, url
except Exception as e:
print(f" ERROR {url}: {e}", file=sys.stderr)
return False, url
def main():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--output",
type=Path,
required=True,
help="Output directory",
)
args = parser.parse_args()
out: Path = args.output
twemoji_codes = collect_twemoji_codes()
# Build download list
tasks: list[tuple[str, Path]] = []
# Font glyphs: 256 range files per font stack
for font in FONT_STACKS:
font_encoded = font.replace(" ", "%20")
font_dir = out / "fonts" / font
for start in range(0, 65536, 256):
end = start + 255
name = f"{start}-{end}.pbf"
url = f"{GLYPHS_BASE}/{font_encoded}/{name}"
tasks.append((url, font_dir / name))
# Twemoji PNGs
twemoji_dir = out / "twemoji"
for code in twemoji_codes:
url = f"{TWEMOJI_BASE}/{code}.png"
tasks.append((url, twemoji_dir / f"{code}.png"))
# Skip already-downloaded files
remaining = [(url, dest) for url, dest in tasks]
print(f"Downloading {len(remaining)} assets")
ok = 0
fail = 0
with ThreadPoolExecutor(max_workers=20) as pool:
futures = {
pool.submit(download_file, url, dest): url for url, dest in remaining
}
for future in as_completed(futures):
success, url = future.result()
if success:
ok += 1
else:
fail += 1
print(f"Done: {ok} downloaded, {fail} failed")
if __name__ == "__main__":
main()