Fix map_assets
This commit is contained in:
parent
4506263e5b
commit
a7d528fb68
2 changed files with 109 additions and 1 deletions
108
pipeline/download/map_assets.py
Normal file
108
pipeline/download/map_assets.py
Normal file
|
|
@ -0,0 +1,108 @@
|
|||
import argparse
|
||||
import sys
|
||||
import urllib.request
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from pathlib import Path
|
||||
|
||||
from pipeline.transform.transform_poi import NAPTAN_EMOJIS, _CATEGORIES
|
||||
|
||||
GLYPHS_BASE = "https://protomaps.github.io/basemaps-assets/fonts"
|
||||
TWEMOJI_BASE = "https://cdn.jsdelivr.net/gh/twitter/twemoji@14.0.2/assets/72x72"
|
||||
|
||||
# Font stacks used by @protomaps/basemaps with lang='en'
|
||||
FONT_STACKS = ["Noto Sans Regular", "Noto Sans Italic", "Noto Sans Medium"]
|
||||
|
||||
# Fallback emoji not in any category
|
||||
_FALLBACK_EMOJIS = ["📍"]
|
||||
|
||||
|
||||
def collect_twemoji_codes() -> list[str]:
|
||||
"""Derive twemoji hex codes from transform_poi categories.
|
||||
|
||||
Matches the frontend's emojiToTwemojiUrl() which does
|
||||
emoji.codePointAt(0).toString(16).
|
||||
"""
|
||||
emojis: set[str] = set()
|
||||
|
||||
for _group, _name, emoji, _osm_keys in _CATEGORIES:
|
||||
emojis.add(emoji)
|
||||
|
||||
for emoji in NAPTAN_EMOJIS.values():
|
||||
emojis.add(emoji)
|
||||
|
||||
for emoji in _FALLBACK_EMOJIS:
|
||||
emojis.add(emoji)
|
||||
|
||||
# First codepoint hex, matching frontend logic
|
||||
return sorted({f"{ord(e[0]):x}" for e in emojis})
|
||||
|
||||
|
||||
def download_file(url: str, dest: Path) -> tuple[bool, str]:
|
||||
"""Download a single file. Returns (success, url)."""
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
try:
|
||||
urllib.request.urlretrieve(url, dest)
|
||||
return True, url
|
||||
except urllib.error.HTTPError as e:
|
||||
print(f" {e.code} {url}", file=sys.stderr)
|
||||
return False, url
|
||||
except Exception as e:
|
||||
print(f" ERROR {url}: {e}", file=sys.stderr)
|
||||
return False, url
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
type=Path,
|
||||
required=True,
|
||||
help="Output directory",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
out: Path = args.output
|
||||
|
||||
twemoji_codes = collect_twemoji_codes()
|
||||
|
||||
# Build download list
|
||||
tasks: list[tuple[str, Path]] = []
|
||||
|
||||
# Font glyphs: 256 range files per font stack
|
||||
for font in FONT_STACKS:
|
||||
font_encoded = font.replace(" ", "%20")
|
||||
font_dir = out / "fonts" / font
|
||||
for start in range(0, 65536, 256):
|
||||
end = start + 255
|
||||
name = f"{start}-{end}.pbf"
|
||||
url = f"{GLYPHS_BASE}/{font_encoded}/{name}"
|
||||
tasks.append((url, font_dir / name))
|
||||
|
||||
# Twemoji PNGs
|
||||
twemoji_dir = out / "twemoji"
|
||||
for code in twemoji_codes:
|
||||
url = f"{TWEMOJI_BASE}/{code}.png"
|
||||
tasks.append((url, twemoji_dir / f"{code}.png"))
|
||||
|
||||
# Skip already-downloaded files
|
||||
remaining = [(url, dest) for url, dest in tasks]
|
||||
|
||||
print(f"Downloading {len(remaining)} assets")
|
||||
|
||||
ok = 0
|
||||
fail = 0
|
||||
with ThreadPoolExecutor(max_workers=20) as pool:
|
||||
futures = {
|
||||
pool.submit(download_file, url, dest): url for url, dest in remaining
|
||||
}
|
||||
for future in as_completed(futures):
|
||||
success, url = future.result()
|
||||
if success:
|
||||
ok += 1
|
||||
else:
|
||||
fail += 1
|
||||
|
||||
print(f"Done: {ok} downloaded, {fail} failed")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue