Test changes
This commit is contained in:
parent
4c95815dc8
commit
be02fc16bb
41 changed files with 4224 additions and 759 deletions
|
|
@ -1,9 +1,15 @@
|
|||
import argparse
|
||||
import base64
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
import urllib.request
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
|
||||
from PIL import Image, ImageDraw
|
||||
|
||||
from pipeline.transform.transform_poi import NAPTAN_EMOJIS, _CATEGORIES
|
||||
|
||||
GLYPHS_BASE = "https://protomaps.github.io/basemaps-assets/fonts"
|
||||
|
|
@ -14,53 +20,80 @@ POI_ICON_BASE = "https://geolytix.github.io/MapIcons"
|
|||
# Font stacks used by @protomaps/basemaps with lang='en'
|
||||
FONT_STACKS = ["Noto Sans Regular", "Noto Sans Italic", "Noto Sans Medium"]
|
||||
|
||||
# Fallback emoji not in any category
|
||||
_FALLBACK_EMOJIS = ["📍"]
|
||||
|
||||
POI_ICON_PATHS = [
|
||||
"asda/asda_express_24px.svg",
|
||||
"asda/asda_green_basket_24px.svg",
|
||||
"asda/asda_green_trolley_24px.svg",
|
||||
"asda/asda_living_24px.svg",
|
||||
"asda/asda_pfs_24px.svg",
|
||||
"asda/asda_primary.svg",
|
||||
"asda/asda_superstore_green_trolley_24px.svg",
|
||||
"brands/aldi_24px.svg",
|
||||
"brands/amazon_fresh_alt_24px.svg",
|
||||
"brands/booths_24px.svg",
|
||||
"brands/budgens_24px.svg",
|
||||
"brands/centra_24px.svg",
|
||||
"brands/cook.svg",
|
||||
"brands/coop_24px.svg",
|
||||
"brands/costco_24px.svg",
|
||||
"brands/dunnes_stores_24px.svg",
|
||||
"brands/farmfoods_updated_24px.svg",
|
||||
"brands/heron_24px.svg",
|
||||
"brands/iceland_24px.svg",
|
||||
"brands/iceland_food_warehouse_24px.svg",
|
||||
"brands/lidl_24px.svg",
|
||||
"brands/little_waitrose_24px.svg",
|
||||
"brands/makro_24px.svg",
|
||||
"brands/mns_24px.svg",
|
||||
"brands/mns_food_24px.svg",
|
||||
"brands/mns_high_street_24px.svg",
|
||||
"brands/mns_hospital_24px.svg",
|
||||
"brands/mns_moto_24px.svg",
|
||||
"brands/mns_outlet_24px.svg",
|
||||
"brands/morrisons_24px.svg",
|
||||
"brands/morrisons_daily_24px.svg",
|
||||
"brands/sainsburys_24px.svg",
|
||||
"brands/sainsburys_local_24px.svg",
|
||||
"brands/spar_24px.svg",
|
||||
"brands/tesco_24px.svg",
|
||||
"brands/tesco_express_24px.svg",
|
||||
"brands/tesco_extra_24px.svg",
|
||||
"brands/waitrose_24px.svg",
|
||||
"brands/wholefoods_24px.svg",
|
||||
"logos/planet_organic_24px.svg",
|
||||
"brands_2023/supermarkets/farmfoods.svg",
|
||||
"brands_2023/supermarkets/heron_foods.svg",
|
||||
"brands_2023/supermarkets/little_waitrose.svg",
|
||||
"brands_2024/amazon_fresh.svg",
|
||||
"brands_2024/booths.svg",
|
||||
"brands_2024/budgens.svg",
|
||||
"brands_2024/cook.svg",
|
||||
"brands_2024/dunnes_stores.svg",
|
||||
"brands_2024/iceland.svg",
|
||||
"brands_2024/makro.svg",
|
||||
"brands_2024/mns.svg",
|
||||
"brands_2024/morrisons_daily.svg",
|
||||
"brands_2024/sainsburys_local.svg",
|
||||
"brands_2024/wholefoods.svg",
|
||||
"logos/aldi.svg",
|
||||
"logos/asda.svg",
|
||||
"logos/centra.svg",
|
||||
"logos/coop.svg",
|
||||
"logos/lidl.svg",
|
||||
"logos/morrisons.svg",
|
||||
"logos/planet_organic.svg",
|
||||
"logos/sainsburys.svg",
|
||||
"logos/spar.svg",
|
||||
"logos/tesco.svg",
|
||||
"logos/tesco_express.svg",
|
||||
"logos/tesco_extra.svg",
|
||||
"logos/waitrose.svg",
|
||||
"public_transport/london_tube.svg",
|
||||
"visuals/mns.svg",
|
||||
]
|
||||
|
||||
DERIVED_POI_ICON_PATHS = [
|
||||
("costco_logo", "brands/costco.svg", "logos/costco.svg"),
|
||||
(
|
||||
"embedded_png",
|
||||
"brands/iceland_food_warehouse_24px.svg",
|
||||
"logos/the_food_warehouse.png",
|
||||
),
|
||||
]
|
||||
|
||||
POI_ICON_SVG_CROPS = {
|
||||
"brands_2023/supermarkets/farmfoods.svg": (1.293, 7.314, 15.48, 3.293),
|
||||
"brands_2023/supermarkets/heron_foods.svg": (0.062, 6.68, 17.995, 5.325),
|
||||
"brands_2023/supermarkets/little_waitrose.svg": (0.916, 5.645, 16.365, 6.719),
|
||||
"brands_2024/amazon_fresh.svg": (3.817, 1.646, 16.367, 16.358),
|
||||
"brands_2024/booths.svg": (1.456, 7.143, 15.313, 3.512),
|
||||
"brands_2024/budgens.svg": (2.251, 2.278, 13.6, 13.612),
|
||||
"brands_2024/cook.svg": (5.028, 5.493, 13.945, 9.648),
|
||||
"brands_2024/dunnes_stores.svg": (4.375, 7.732, 15.249, 5.055),
|
||||
"brands_2024/iceland.svg": (1.136, 6.823, 16.067, 4.302),
|
||||
"brands_2024/makro.svg": (4.411, 6.098, 16.397, 5.428),
|
||||
"brands_2024/mns.svg": (4.042, 6.986, 16.171, 6.724),
|
||||
"brands_2024/morrisons_daily.svg": (3.341, 4.414, 17.317, 8.248),
|
||||
"brands_2024/sainsburys_local.svg": (4.58, 1.61, 14.84, 14.849),
|
||||
"brands_2024/wholefoods.svg": (4.17, 2.193, 15.659, 15.668),
|
||||
"logos/aldi.svg": (4.813, 2.563, 14.374, 14.383),
|
||||
"logos/asda.svg": (3.91, 7.135, 16.181, 5.442),
|
||||
"logos/centra.svg": (3.36, 7.35, 17.28, 4.651),
|
||||
"logos/coop.svg": (6.407, 4.658, 11.187, 11.793),
|
||||
"logos/costco.svg": (70.61, 144.908, 256.67, 85.825),
|
||||
"logos/lidl.svg": (4.938, 2.973, 13.985, 13.985),
|
||||
"logos/morrisons.svg": (5.231, 2.985, 13.538, 13.398),
|
||||
"logos/planet_organic.svg": (5.528, 3.564, 12.943, 12.943),
|
||||
"logos/sainsburys.svg": (7.502, 3.572, 8.996, 12.646),
|
||||
"logos/spar.svg": (4.933, 2.968, 14.133, 13.853),
|
||||
"logos/tesco.svg": (4.338, 6.865, 15.324, 5.359),
|
||||
"logos/tesco_express.svg": (5.231, 5.933, 13.538, 8.345),
|
||||
"logos/tesco_extra.svg": (4.933, 5.775, 14.133, 8.519),
|
||||
"logos/waitrose.svg": (5.528, 6.09, 12.943, 9.855),
|
||||
}
|
||||
|
||||
POI_ICON_SVG_INTRINSIC_MAX = 512
|
||||
|
||||
|
||||
def collect_twemoji_codes() -> list[str]:
|
||||
"""Derive twemoji hex codes from transform_poi categories.
|
||||
|
|
@ -76,9 +109,6 @@ def collect_twemoji_codes() -> list[str]:
|
|||
for emoji in NAPTAN_EMOJIS.values():
|
||||
emojis.add(emoji)
|
||||
|
||||
for emoji in _FALLBACK_EMOJIS:
|
||||
emojis.add(emoji)
|
||||
|
||||
# First codepoint hex, matching frontend logic
|
||||
return sorted({f"{ord(e[0]):x}" for e in emojis})
|
||||
|
||||
|
|
@ -97,6 +127,214 @@ def download_file(url: str, dest: Path) -> tuple[bool, str]:
|
|||
return False, url
|
||||
|
||||
|
||||
def download_text(url: str) -> str:
|
||||
with urllib.request.urlopen(url) as response:
|
||||
return response.read().decode("utf-8")
|
||||
|
||||
|
||||
def build_costco_logo(marker_svg: str) -> str:
|
||||
start = marker_svg.find('<g><path d=" M 316.312')
|
||||
end = marker_svg.rfind("</g></g></svg>")
|
||||
if start < 0 or end < 0:
|
||||
raise ValueError("Costco marker SVG layout changed")
|
||||
|
||||
logo_group = marker_svg[start : end + 4]
|
||||
return (
|
||||
'<?xml version="1.0" encoding="UTF-8"?>\n'
|
||||
'<svg xmlns="http://www.w3.org/2000/svg" viewBox="70 145 260 90" '
|
||||
'width="260pt" height="90pt" preserveAspectRatio="xMidYMid meet">\n'
|
||||
f"{logo_group}\n"
|
||||
"</svg>\n"
|
||||
)
|
||||
|
||||
|
||||
def trim_white_png(png_bytes: bytes) -> bytes:
|
||||
image = Image.open(BytesIO(png_bytes)).convert("RGBA")
|
||||
pixels = image.load()
|
||||
|
||||
for y in range(image.height):
|
||||
for x in range(image.width):
|
||||
red, green, blue, alpha = pixels[x, y]
|
||||
if red > 245 and green > 245 and blue > 245:
|
||||
pixels[x, y] = (red, green, blue, 0)
|
||||
|
||||
alpha_box = image.getchannel("A").getbbox()
|
||||
if alpha_box:
|
||||
image = image.crop(alpha_box)
|
||||
|
||||
out = BytesIO()
|
||||
image.save(out, format="PNG")
|
||||
return out.getvalue()
|
||||
|
||||
|
||||
def extract_embedded_png(marker_svg: str) -> bytes:
|
||||
match = re.search(r"base64,([^\"']+)", marker_svg)
|
||||
if not match:
|
||||
raise ValueError("POI marker SVG did not contain an embedded PNG")
|
||||
return trim_white_png(base64.b64decode(match.group(1)))
|
||||
|
||||
|
||||
def svg_intrinsic_size(width: float, height: float) -> tuple[int, int]:
|
||||
if width <= 0 or height <= 0:
|
||||
return (POI_ICON_SVG_INTRINSIC_MAX, POI_ICON_SVG_INTRINSIC_MAX)
|
||||
if width >= height:
|
||||
return (
|
||||
POI_ICON_SVG_INTRINSIC_MAX,
|
||||
max(1, round(POI_ICON_SVG_INTRINSIC_MAX * height / width)),
|
||||
)
|
||||
return (
|
||||
max(1, round(POI_ICON_SVG_INTRINSIC_MAX * width / height)),
|
||||
POI_ICON_SVG_INTRINSIC_MAX,
|
||||
)
|
||||
|
||||
|
||||
def set_svg_geometry(svg_text: str, crop: tuple[float, float, float, float]) -> str:
|
||||
x, y, width, height = crop
|
||||
view_box = f"{x:g} {y:g} {width:g} {height:g}"
|
||||
intrinsic_width, intrinsic_height = svg_intrinsic_size(width, height)
|
||||
|
||||
svg_text = re.sub(r'viewBox="[^"]+"', f'viewBox="{view_box}"', svg_text, count=1)
|
||||
if 'viewBox="' not in svg_text:
|
||||
svg_text = re.sub(r"<svg\b", f'<svg viewBox="{view_box}"', svg_text, count=1)
|
||||
|
||||
svg_text = re.sub(r'width="[^"]+"', f'width="{intrinsic_width}"', svg_text, count=1)
|
||||
if 'width="' not in svg_text:
|
||||
svg_text = re.sub(
|
||||
r"<svg\b", f'<svg width="{intrinsic_width}"', svg_text, count=1
|
||||
)
|
||||
|
||||
svg_text = re.sub(
|
||||
r'height="[^"]+"', f'height="{intrinsic_height}"', svg_text, count=1
|
||||
)
|
||||
if 'height="' not in svg_text:
|
||||
svg_text = re.sub(
|
||||
r"<svg\b", f'<svg height="{intrinsic_height}"', svg_text, count=1
|
||||
)
|
||||
|
||||
return svg_text
|
||||
|
||||
|
||||
def get_svg_view_box(svg_text: str) -> tuple[float, float, float, float] | None:
|
||||
match = re.search(r'viewBox="([^"]+)"', svg_text)
|
||||
if not match:
|
||||
return None
|
||||
parts = [
|
||||
float(part) for part in re.split(r"[\s,]+", match.group(1).strip()) if part
|
||||
]
|
||||
if len(parts) != 4:
|
||||
return None
|
||||
return (parts[0], parts[1], parts[2], parts[3])
|
||||
|
||||
|
||||
def crop_poi_svg_icons(poi_icons_dir: Path) -> None:
|
||||
for icon_path, crop in POI_ICON_SVG_CROPS.items():
|
||||
dest = poi_icons_dir / icon_path
|
||||
if not dest.exists():
|
||||
continue
|
||||
svg_text = dest.read_text(encoding="utf-8")
|
||||
if icon_path == "brands_2024/dunnes_stores.svg":
|
||||
svg_text = svg_text.replace('fill="#fffcfc"', 'fill="#111111"')
|
||||
svg_text = svg_text.replace('fill="#fcfcfc"', 'fill="#111111"')
|
||||
dest.write_text(set_svg_geometry(svg_text, crop), encoding="utf-8")
|
||||
|
||||
for dest in poi_icons_dir.rglob("*.svg"):
|
||||
svg_text = dest.read_text(encoding="utf-8")
|
||||
view_box = get_svg_view_box(svg_text)
|
||||
if view_box:
|
||||
dest.write_text(set_svg_geometry(svg_text, view_box), encoding="utf-8")
|
||||
|
||||
|
||||
def download_derived_poi_icon(
|
||||
kind: str, source_path: str, dest: Path
|
||||
) -> tuple[bool, str]:
|
||||
url = f"{POI_ICON_BASE}/{source_path}"
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
try:
|
||||
source = download_text(url)
|
||||
if kind == "costco_logo":
|
||||
dest.write_text(build_costco_logo(source), encoding="utf-8")
|
||||
elif kind == "embedded_png":
|
||||
dest.write_bytes(extract_embedded_png(source))
|
||||
else:
|
||||
raise ValueError(f"Unknown derived POI icon kind: {kind}")
|
||||
return True, url
|
||||
except urllib.error.HTTPError as e:
|
||||
print(f" {e.code} {url}", file=sys.stderr)
|
||||
return False, url
|
||||
except Exception as e:
|
||||
print(f" ERROR {url}: {e}", file=sys.stderr)
|
||||
return False, url
|
||||
|
||||
|
||||
# Slategray accent used by civic POI icons (school, library, building, …) in
|
||||
# protomaps' v4 sprite. We match it so the townhall blends in with its peers.
|
||||
_TOWNHALL_COLOR = {
|
||||
"light": (135, 128, 171),
|
||||
"dark": (118, 118, 127),
|
||||
}
|
||||
_TOWNHALL_LOGICAL_SIZE = 17
|
||||
|
||||
|
||||
def _render_townhall_glyph(size_px: int, color: tuple[int, int, int]) -> Image.Image:
|
||||
# Draw at 8× resolution and downsample with Lanczos so the pediment's
|
||||
# diagonals come out anti-aliased; PIL's polygon fill is otherwise aliased.
|
||||
super_factor = 8
|
||||
canvas = size_px * super_factor
|
||||
img = Image.new("RGBA", (canvas, canvas), (0, 0, 0, 0))
|
||||
draw = ImageDraw.Draw(img)
|
||||
fill = (*color, 255)
|
||||
|
||||
def s(v: float) -> float:
|
||||
return v * canvas / _TOWNHALL_LOGICAL_SIZE
|
||||
|
||||
draw.polygon([(s(8.5), s(1)), (s(15), s(6.5)), (s(2), s(6.5))], fill=fill)
|
||||
draw.rectangle([(s(1), s(6.5)), (s(16), s(8.5))], fill=fill)
|
||||
for column_x in (3, 8, 13):
|
||||
draw.rectangle([(s(column_x), s(8.5)), (s(column_x + 1.5), s(14))], fill=fill)
|
||||
draw.rectangle([(s(0), s(14)), (s(17), s(15.5))], fill=fill)
|
||||
|
||||
return img.resize((size_px, size_px), Image.LANCZOS)
|
||||
|
||||
|
||||
def inject_townhall_sprite(sprites_dir: Path) -> None:
|
||||
"""Append a townhall glyph to each downloaded sprite sheet.
|
||||
|
||||
Protomaps' v4 sprite omits `townhall` even though the basemap style
|
||||
references it; we add the icon here so MapLibre can resolve the name
|
||||
natively at runtime.
|
||||
"""
|
||||
for theme in ("light", "dark"):
|
||||
color = _TOWNHALL_COLOR[theme]
|
||||
for suffix, scale in (("", 1), ("@2x", 2)):
|
||||
json_path = sprites_dir / f"{theme}{suffix}.json"
|
||||
png_path = sprites_dir / f"{theme}{suffix}.png"
|
||||
if not json_path.exists() or not png_path.exists():
|
||||
continue
|
||||
|
||||
manifest = json.loads(json_path.read_text())
|
||||
sheet = Image.open(png_path).convert("RGBA")
|
||||
|
||||
glyph_size = _TOWNHALL_LOGICAL_SIZE * scale
|
||||
glyph = _render_townhall_glyph(glyph_size, color)
|
||||
|
||||
new_width = max(sheet.width, glyph_size)
|
||||
new_height = sheet.height + glyph_size
|
||||
extended = Image.new("RGBA", (new_width, new_height), (0, 0, 0, 0))
|
||||
extended.paste(sheet, (0, 0))
|
||||
extended.paste(glyph, (0, sheet.height))
|
||||
extended.save(png_path, optimize=True)
|
||||
|
||||
manifest["townhall"] = {
|
||||
"x": 0,
|
||||
"y": sheet.height,
|
||||
"width": glyph_size,
|
||||
"height": glyph_size,
|
||||
"pixelRatio": scale,
|
||||
}
|
||||
json_path.write_text(json.dumps(manifest))
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument(
|
||||
|
|
@ -147,7 +385,7 @@ def main():
|
|||
# Skip already-downloaded files
|
||||
remaining = [(url, dest) for url, dest in tasks]
|
||||
|
||||
print(f"Downloading {len(remaining)} assets")
|
||||
print(f"Downloading {len(remaining) + len(DERIVED_POI_ICON_PATHS)} assets")
|
||||
|
||||
ok = 0
|
||||
fail = 0
|
||||
|
|
@ -162,6 +400,18 @@ def main():
|
|||
else:
|
||||
fail += 1
|
||||
|
||||
for kind, source_path, dest_path in DERIVED_POI_ICON_PATHS:
|
||||
success, _url = download_derived_poi_icon(
|
||||
kind, source_path, poi_icons_dir / dest_path
|
||||
)
|
||||
if success:
|
||||
ok += 1
|
||||
else:
|
||||
fail += 1
|
||||
|
||||
crop_poi_svg_icons(poi_icons_dir)
|
||||
inject_townhall_sprite(sprites_dir)
|
||||
|
||||
print(f"Done: {ok} downloaded, {fail} failed")
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ Reuses the same england-latest.osm.pbf as pois.py.
|
|||
"""
|
||||
|
||||
import argparse
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
import osmium
|
||||
|
|
@ -44,11 +45,37 @@ _STATION_STRIP = (
|
|||
" underground station",
|
||||
" railway station",
|
||||
" dlr station",
|
||||
" station dlr",
|
||||
" dlr",
|
||||
" overground station",
|
||||
" tram stop",
|
||||
" station",
|
||||
)
|
||||
|
||||
_DLR_CODE_RE = re.compile(r"ZZDL([A-Z0-9]{3})")
|
||||
|
||||
|
||||
def _is_dlr_station(tags: dict[str, str]) -> bool:
|
||||
name = tags.get("name", "").lower()
|
||||
network = tags.get("network", "").lower()
|
||||
operator = tags.get("operator", "").lower()
|
||||
return (
|
||||
"docklands" in network
|
||||
or "dlr" in network
|
||||
or "docklands" in operator
|
||||
or "dlr" in operator
|
||||
or name.endswith(" dlr")
|
||||
or " dlr " in name
|
||||
)
|
||||
|
||||
|
||||
def _is_tram_station(tags: dict[str, str]) -> bool:
|
||||
if _is_dlr_station(tags):
|
||||
return False
|
||||
station_tag = tags.get("station", "")
|
||||
network = tags.get("network", "").lower()
|
||||
return station_tag == "light_rail" or "tramlink" in network or "tram" in network
|
||||
|
||||
|
||||
def _station_display_name(name: str, tags: dict[str, str]) -> str:
|
||||
"""Build a descriptive station name like 'Bank tube station'."""
|
||||
|
|
@ -78,6 +105,96 @@ def _station_display_name(name: str, tags: dict[str, str]) -> str:
|
|||
return f"{name} {suffix}"
|
||||
|
||||
|
||||
def _station_name_score(name: str) -> tuple[int, int]:
|
||||
lower = name.lower()
|
||||
suffix_penalty = int(
|
||||
lower.endswith(
|
||||
(
|
||||
" underground station",
|
||||
" tube station",
|
||||
" dlr station",
|
||||
" railway station",
|
||||
" rail station",
|
||||
" station dlr",
|
||||
" station",
|
||||
)
|
||||
)
|
||||
or lower.endswith(" dlr")
|
||||
)
|
||||
return (suffix_penalty, len(name))
|
||||
|
||||
|
||||
def _naptan_dlr_stations(naptan_path: Path) -> list[dict]:
|
||||
"""Extract station-level DLR destinations from NaPTAN access nodes."""
|
||||
df = pl.read_parquet(naptan_path)
|
||||
required = {"id", "name", "category", "lat", "lng"}
|
||||
missing = required - set(df.columns)
|
||||
if missing:
|
||||
raise ValueError(f"NaPTAN file is missing columns: {sorted(missing)}")
|
||||
|
||||
rows: dict[str, dict] = {}
|
||||
for row in df.iter_rows(named=True):
|
||||
atco_id = str(row["id"] or "")
|
||||
match = _DLR_CODE_RE.search(atco_id)
|
||||
if not match:
|
||||
continue
|
||||
if row["category"] not in {"Tube station", "Rail station"}:
|
||||
continue
|
||||
|
||||
code = match.group(1)
|
||||
raw_name = str(row["name"] or "")
|
||||
if not raw_name:
|
||||
continue
|
||||
|
||||
lat = float(row["lat"])
|
||||
lon = float(row["lng"])
|
||||
current = rows.get(code)
|
||||
if current is None:
|
||||
rows[code] = {
|
||||
"raw_name": raw_name,
|
||||
"lat_sum": lat,
|
||||
"lon_sum": lon,
|
||||
"count": 1,
|
||||
}
|
||||
continue
|
||||
|
||||
current["lat_sum"] += lat
|
||||
current["lon_sum"] += lon
|
||||
current["count"] += 1
|
||||
if _station_name_score(raw_name) < _station_name_score(current["raw_name"]):
|
||||
current["raw_name"] = raw_name
|
||||
|
||||
stations = []
|
||||
for station in rows.values():
|
||||
count = station["count"]
|
||||
display_name = _station_display_name(station["raw_name"], {"network": "DLR"})
|
||||
stations.append(
|
||||
{
|
||||
"name": display_name,
|
||||
"place_type": "station",
|
||||
"lat": station["lat_sum"] / count,
|
||||
"lon": station["lon_sum"] / count,
|
||||
"population": 0,
|
||||
"travel_destination": True,
|
||||
}
|
||||
)
|
||||
|
||||
return sorted(stations, key=lambda station: station["name"])
|
||||
|
||||
|
||||
def _append_naptan_dlr_stations(places: list[dict], naptan_path: Path) -> int:
|
||||
existing_names = {str(place["name"]).casefold() for place in places}
|
||||
added = 0
|
||||
for station in _naptan_dlr_stations(naptan_path):
|
||||
key = station["name"].casefold()
|
||||
if key in existing_names:
|
||||
continue
|
||||
places.append(station)
|
||||
existing_names.add(key)
|
||||
added += 1
|
||||
return added
|
||||
|
||||
|
||||
class PlaceHandler(osmium.SimpleHandler):
|
||||
def __init__(self, progress: tqdm, england_polygon) -> None:
|
||||
super().__init__()
|
||||
|
|
@ -145,14 +262,7 @@ class PlaceHandler(osmium.SimpleHandler):
|
|||
# Railway stations (tube, national rail, DLR, overground, Elizabeth line)
|
||||
if n.tags.get("railway") == "station":
|
||||
tags = dict(n.tags)
|
||||
station_tag = tags.get("station", "")
|
||||
network = tags.get("network", "").lower()
|
||||
# Skip tram stops
|
||||
if (
|
||||
station_tag == "light_rail"
|
||||
or "tramlink" in network
|
||||
or "tram" in network
|
||||
):
|
||||
if _is_tram_station(tags):
|
||||
return
|
||||
display_name = _station_display_name(name, tags)
|
||||
self._add(
|
||||
|
|
@ -178,6 +288,11 @@ def main() -> None:
|
|||
required=True,
|
||||
help="England boundary GeoJSON file",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--naptan",
|
||||
type=Path,
|
||||
help="Optional NaPTAN parquet file used to add DLR station destinations",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
pbf_file = args.pbf
|
||||
|
|
@ -195,6 +310,9 @@ def main() -> None:
|
|||
handler.apply_file(str(pbf_file), locations=True)
|
||||
|
||||
print(f"Extracted {len(handler.places):,} place nodes")
|
||||
if args.naptan:
|
||||
added = _append_naptan_dlr_stations(handler.places, args.naptan)
|
||||
print(f"Added {added:,} DLR station destinations from NaPTAN")
|
||||
|
||||
if handler.places:
|
||||
df = pl.DataFrame(handler.places)
|
||||
|
|
|
|||
81
pipeline/download/test_places.py
Normal file
81
pipeline/download/test_places.py
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
import polars as pl
|
||||
|
||||
from pipeline.download.places import (
|
||||
_is_dlr_station,
|
||||
_is_tram_station,
|
||||
_naptan_dlr_stations,
|
||||
_station_display_name,
|
||||
)
|
||||
|
||||
|
||||
def test_dlr_light_rail_is_not_treated_as_tram():
|
||||
dlr_tags = {
|
||||
"name": "Lewisham DLR",
|
||||
"railway": "station",
|
||||
"station": "light_rail",
|
||||
"network": "Docklands Light Railway",
|
||||
}
|
||||
|
||||
assert _is_dlr_station(dlr_tags)
|
||||
assert not _is_tram_station(dlr_tags)
|
||||
assert _station_display_name("Lewisham DLR", dlr_tags) == "Lewisham DLR station"
|
||||
assert (
|
||||
_station_display_name("Tower Gateway Station DLR", dlr_tags)
|
||||
== "Tower Gateway DLR station"
|
||||
)
|
||||
|
||||
|
||||
def test_tram_light_rail_is_still_excluded():
|
||||
tram_tags = {
|
||||
"name": "East Croydon",
|
||||
"railway": "station",
|
||||
"station": "light_rail",
|
||||
"network": "London Trams",
|
||||
}
|
||||
|
||||
assert not _is_dlr_station(tram_tags)
|
||||
assert _is_tram_station(tram_tags)
|
||||
|
||||
|
||||
def test_naptan_dlr_stations_are_deduplicated_by_atco_code(tmp_path):
|
||||
naptan = tmp_path / "naptan.parquet"
|
||||
pl.DataFrame(
|
||||
{
|
||||
"id": [
|
||||
"4900ZZDLSHA3",
|
||||
"9400ZZDLSHA",
|
||||
"4900ZZDLGRE1",
|
||||
"490002076RV",
|
||||
"4900ZZLUBNK",
|
||||
],
|
||||
"name": [
|
||||
"Shadwell DLR",
|
||||
"Shadwell DLR Station",
|
||||
"Greenwich Station",
|
||||
"Tower Gateway Station DLR",
|
||||
"Bank",
|
||||
],
|
||||
"category": [
|
||||
"Tube station",
|
||||
"Tube station",
|
||||
"Rail station",
|
||||
"Bus stop",
|
||||
"Tube station",
|
||||
],
|
||||
"lat": [51.51156, 51.511693, 51.47794, 51.510575, 51.5131],
|
||||
"lng": [-0.055595, -0.056643, -0.01442, -0.07514, -0.0894],
|
||||
}
|
||||
).write_parquet(naptan)
|
||||
|
||||
stations = _naptan_dlr_stations(naptan)
|
||||
|
||||
assert [station["name"] for station in stations] == [
|
||||
"Greenwich DLR station",
|
||||
"Shadwell DLR station",
|
||||
]
|
||||
shadwell = next(
|
||||
station for station in stations if station["name"].startswith("Shadwell")
|
||||
)
|
||||
assert shadwell["lat"] == (51.51156 + 51.511693) / 2
|
||||
assert shadwell["place_type"] == "station"
|
||||
assert shadwell["travel_destination"] is True
|
||||
|
|
@ -56,6 +56,7 @@ NR_AUTH_URL = "https://opendata.nationalrail.co.uk/authenticate"
|
|||
NR_TIMETABLE_URL = "https://opendata.nationalrail.co.uk/api/staticfeeds/3.0/timetable"
|
||||
|
||||
USER_AGENT = "property-map-pipeline/1.0 (https://github.com)"
|
||||
TRANSXCHANGE2GTFS_PACKAGE = "transxchange2gtfs@1.12.0"
|
||||
|
||||
|
||||
def _download_http(
|
||||
|
|
@ -473,10 +474,50 @@ def convert_tfl_to_gtfs(raw_dir: Path, output_dir: Path) -> Path:
|
|||
download_naptan()
|
||||
|
||||
print("Converting TfL TransXChange → GTFS...")
|
||||
# The shim patches known packaging/runtime issues in the pinned npm package
|
||||
# before loading its CLI from npx's temporary install.
|
||||
shim_path = Path(__file__).with_name("transxchange2gtfs_shim.js")
|
||||
subprocess.run(
|
||||
["npx", "--yes", "transxchange2gtfs", str(txc_path), str(dest)],
|
||||
[
|
||||
"npx",
|
||||
"--yes",
|
||||
"--package",
|
||||
TRANSXCHANGE2GTFS_PACKAGE,
|
||||
"sh",
|
||||
"-c",
|
||||
"\n".join(
|
||||
[
|
||||
'bin="$(command -v transxchange2gtfs)"',
|
||||
'script="$(readlink -f "$bin")"',
|
||||
'pkg_dir="$(dirname "$(dirname "$script")")"',
|
||||
'shim="$1"',
|
||||
"shift",
|
||||
'exec node "$shim" "$pkg_dir" "$@"',
|
||||
]
|
||||
),
|
||||
"transxchange2gtfs",
|
||||
str(shim_path.resolve()),
|
||||
str(txc_path.resolve()),
|
||||
str(dest.resolve()),
|
||||
],
|
||||
check=True,
|
||||
)
|
||||
required_files = {
|
||||
"agency.txt",
|
||||
"calendar.txt",
|
||||
"calendar_dates.txt",
|
||||
"routes.txt",
|
||||
"stop_times.txt",
|
||||
"stops.txt",
|
||||
"trips.txt",
|
||||
}
|
||||
if not dest.exists() or not zipfile.is_zipfile(dest):
|
||||
raise RuntimeError(f"transxchange2gtfs did not create a valid GTFS zip: {dest}")
|
||||
with zipfile.ZipFile(dest) as z:
|
||||
missing = required_files - set(z.namelist())
|
||||
if missing:
|
||||
missing_str = ", ".join(sorted(missing))
|
||||
raise RuntimeError(f"TfL GTFS zip is missing required files: {missing_str}")
|
||||
size_mb = dest.stat().st_size / (1024 * 1024)
|
||||
print(f" Saved to {dest} ({size_mb:.1f} MB)")
|
||||
return dest
|
||||
|
|
|
|||
76
pipeline/download/transxchange2gtfs_shim.js
Normal file
76
pipeline/download/transxchange2gtfs_shim.js
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
#!/usr/bin/env node
|
||||
"use strict";
|
||||
|
||||
const fs = require("fs");
|
||||
const path = require("path");
|
||||
const { createRequire } = require("module");
|
||||
|
||||
const [pkgDirArg, ...converterArgs] = process.argv.slice(2);
|
||||
|
||||
if (!pkgDirArg || converterArgs.length < 2) {
|
||||
console.error(
|
||||
"Usage: transxchange2gtfs_shim.js <package-dir> <input...> <output>",
|
||||
);
|
||||
process.exit(2);
|
||||
}
|
||||
|
||||
const pkgDir = path.resolve(pkgDirArg);
|
||||
|
||||
function replaceOnce(relativePath, before, after) {
|
||||
const file = path.join(pkgDir, relativePath);
|
||||
const original = fs.readFileSync(file, "utf8");
|
||||
if (original.includes(before)) {
|
||||
fs.writeFileSync(file, original.replace(before, after));
|
||||
} else if (original.includes(after)) {
|
||||
return;
|
||||
} else {
|
||||
throw new Error(`Could not patch ${relativePath}: expected text not found`);
|
||||
}
|
||||
}
|
||||
|
||||
// The published 1.12.0 package has a few compatibility issues with current
|
||||
// TfL TransXChange exports:
|
||||
// - the bin script points at dist/src/cli.js, but the package ships dist/cli.js
|
||||
// - the compiled date-holidays import expects a synthetic default export
|
||||
// - some TfL journeys reference timing links without matching route-link geometry
|
||||
//
|
||||
// GTFS shapes are optional for R5 routing. Clear shape references and omit
|
||||
// shapes.txt so missing route geometry does not drop otherwise usable trips.
|
||||
function patchPackage() {
|
||||
replaceOnce(
|
||||
"dist/transxchange/TransXChangeJourneyStream.js",
|
||||
"distanceSoFarM += routeLink.Distance;",
|
||||
"distanceSoFarM += routeLink ? routeLink.Distance : 0;",
|
||||
);
|
||||
replaceOnce(
|
||||
"dist/gtfs/TripsStream.js",
|
||||
"(0, crypto_1.createHash)('md5').update(JSON.stringify({ routeId: journey.route, routeLinkSeq: journey.routeLinkIds })).digest(\"hex\"));",
|
||||
"\"\");",
|
||||
);
|
||||
replaceOnce(
|
||||
"dist/gtfs/StopTimesStream.js",
|
||||
"stop.shapeDistTraveled, stop.exactTime ? \"1\" : \"0\");",
|
||||
"\"\", stop.exactTime ? \"1\" : \"0\");",
|
||||
);
|
||||
replaceOnce(
|
||||
"dist/Container.js",
|
||||
"\"stops.txt\": transxchange.pipe(new StopsStream_1.StopsStream(naptanIndex)),\n \"shapes.txt\": journeyStream.pipe(new ShapesStream_1.ShapesStream())",
|
||||
"\"stops.txt\": transxchange.pipe(new StopsStream_1.StopsStream(naptanIndex))",
|
||||
);
|
||||
replaceOnce(
|
||||
"dist/Container.js",
|
||||
"\"routes.txt\": transxchange.pipe(new RoutesStream_1.RoutesStream()),\n \"transfers.txt\": transxchange.pipe(new TransfersStream_1.TransfersStream(naptanIndex, locationIndex)),\n \"stops.txt\": transxchange.pipe(new StopsStream_1.StopsStream(naptanIndex))",
|
||||
"\"routes.txt\": transxchange.pipe(new RoutesStream_1.RoutesStream()),\n \"stops.txt\": transxchange.pipe(new StopsStream_1.StopsStream(naptanIndex))",
|
||||
);
|
||||
}
|
||||
|
||||
patchPackage();
|
||||
|
||||
const pkgRequire = createRequire(path.join(pkgDir, "package.json"));
|
||||
const Holidays = pkgRequire("date-holidays");
|
||||
if (!Holidays.default) {
|
||||
Holidays.default = Holidays;
|
||||
}
|
||||
|
||||
process.argv = [process.argv[0], "transxchange2gtfs", ...converterArgs];
|
||||
require(path.join(pkgDir, "dist", "cli.js"));
|
||||
Loading…
Add table
Add a link
Reference in a new issue