diff --git a/Taskfile.yml b/Taskfile.yml index 37bb769..eaaefd2 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -19,7 +19,7 @@ tasks: - test -d frontend/public/assets/fonts - test -d frontend/public/assets/twemoji cmds: - - uv run python scripts/download_map_assets.py + - uv run python -m pipeline.download.map_assets --output frontend/public/assets test: desc: Run all tests (Python and Rust) diff --git a/pipeline/download/map_assets.py b/pipeline/download/map_assets.py new file mode 100644 index 0000000..ec47c8d --- /dev/null +++ b/pipeline/download/map_assets.py @@ -0,0 +1,108 @@ +import argparse +import sys +import urllib.request +from concurrent.futures import ThreadPoolExecutor, as_completed +from pathlib import Path + +from pipeline.transform.transform_poi import NAPTAN_EMOJIS, _CATEGORIES + +GLYPHS_BASE = "https://protomaps.github.io/basemaps-assets/fonts" +TWEMOJI_BASE = "https://cdn.jsdelivr.net/gh/twitter/twemoji@14.0.2/assets/72x72" + +# Font stacks used by @protomaps/basemaps with lang='en' +FONT_STACKS = ["Noto Sans Regular", "Noto Sans Italic", "Noto Sans Medium"] + +# Fallback emoji not in any category +_FALLBACK_EMOJIS = ["📍"] + + +def collect_twemoji_codes() -> list[str]: + """Derive twemoji hex codes from transform_poi categories. + + Matches the frontend's emojiToTwemojiUrl() which does + emoji.codePointAt(0).toString(16). + """ + emojis: set[str] = set() + + for _group, _name, emoji, _osm_keys in _CATEGORIES: + emojis.add(emoji) + + for emoji in NAPTAN_EMOJIS.values(): + emojis.add(emoji) + + for emoji in _FALLBACK_EMOJIS: + emojis.add(emoji) + + # First codepoint hex, matching frontend logic + return sorted({f"{ord(e[0]):x}" for e in emojis}) + + +def download_file(url: str, dest: Path) -> tuple[bool, str]: + """Download a single file. Returns (success, url).""" + dest.parent.mkdir(parents=True, exist_ok=True) + try: + urllib.request.urlretrieve(url, dest) + return True, url + except urllib.error.HTTPError as e: + print(f" {e.code} {url}", file=sys.stderr) + return False, url + except Exception as e: + print(f" ERROR {url}: {e}", file=sys.stderr) + return False, url + + +def main(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--output", + type=Path, + required=True, + help="Output directory", + ) + args = parser.parse_args() + out: Path = args.output + + twemoji_codes = collect_twemoji_codes() + + # Build download list + tasks: list[tuple[str, Path]] = [] + + # Font glyphs: 256 range files per font stack + for font in FONT_STACKS: + font_encoded = font.replace(" ", "%20") + font_dir = out / "fonts" / font + for start in range(0, 65536, 256): + end = start + 255 + name = f"{start}-{end}.pbf" + url = f"{GLYPHS_BASE}/{font_encoded}/{name}" + tasks.append((url, font_dir / name)) + + # Twemoji PNGs + twemoji_dir = out / "twemoji" + for code in twemoji_codes: + url = f"{TWEMOJI_BASE}/{code}.png" + tasks.append((url, twemoji_dir / f"{code}.png")) + + # Skip already-downloaded files + remaining = [(url, dest) for url, dest in tasks] + + print(f"Downloading {len(remaining)} assets") + + ok = 0 + fail = 0 + with ThreadPoolExecutor(max_workers=20) as pool: + futures = { + pool.submit(download_file, url, dest): url for url, dest in remaining + } + for future in as_completed(futures): + success, url = future.result() + if success: + ok += 1 + else: + fail += 1 + + print(f"Done: {ok} downloaded, {fail} failed") + + +if __name__ == "__main__": + main()