Progress
This commit is contained in:
parent
5b68c8da04
commit
536fd14378
28 changed files with 1683 additions and 313 deletions
89
pipeline/download/tiles.py
Normal file
89
pipeline/download/tiles.py
Normal file
|
|
@ -0,0 +1,89 @@
|
|||
"""Download UK PMTiles extract from the latest Protomaps daily build."""
|
||||
|
||||
import argparse
|
||||
import platform
|
||||
import stat
|
||||
import subprocess
|
||||
import sys
|
||||
import tarfile
|
||||
import urllib.request
|
||||
from datetime import datetime, timedelta
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
|
||||
PROTOMAPS_BASE = "https://build.protomaps.com"
|
||||
UK_BBOX = "-10.5,49.5,2.5,61"
|
||||
MAX_AGE_DAYS = 14
|
||||
|
||||
|
||||
def find_latest_build() -> str:
|
||||
"""Find the most recent available Protomaps daily build."""
|
||||
today = datetime.utcnow().date()
|
||||
for i in range(MAX_AGE_DAYS):
|
||||
d = today - timedelta(days=i)
|
||||
url = f"{PROTOMAPS_BASE}/{d:%Y%m%d}.pmtiles"
|
||||
req = urllib.request.Request(url, method="HEAD")
|
||||
try:
|
||||
urllib.request.urlopen(req)
|
||||
print(f"Found build: {d:%Y%m%d}")
|
||||
return url
|
||||
except urllib.error.HTTPError:
|
||||
continue
|
||||
print(
|
||||
f"ERROR: No Protomaps build found in the last {MAX_AGE_DAYS} days",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def ensure_pmtiles_cli(bin_path: Path, version: str) -> None:
|
||||
"""Download the pmtiles CLI if not already present."""
|
||||
if bin_path.exists():
|
||||
return
|
||||
machine = platform.machine()
|
||||
if machine == "x86_64":
|
||||
arch = "x86_64"
|
||||
elif machine == "aarch64":
|
||||
arch = "arm64"
|
||||
else:
|
||||
arch = machine
|
||||
url = (
|
||||
f"https://github.com/protomaps/go-pmtiles/releases/download/"
|
||||
f"v{version}/go-pmtiles_{version}_Linux_{arch}.tar.gz"
|
||||
)
|
||||
print(f"Downloading pmtiles CLI v{version}...")
|
||||
data = urllib.request.urlopen(url).read()
|
||||
with tarfile.open(fileobj=BytesIO(data), mode="r:gz") as tar:
|
||||
member = tar.getmember("pmtiles")
|
||||
f = tar.extractfile(member)
|
||||
assert f is not None
|
||||
bin_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
bin_path.write_bytes(f.read())
|
||||
bin_path.chmod(bin_path.stat().st_mode | stat.S_IEXEC)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--output", type=Path, required=True, help="Output .pmtiles path")
|
||||
parser.add_argument(
|
||||
"--pmtiles-version", default="1.22.3", help="go-pmtiles release version"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
bin_path = args.output.parent / "pmtiles"
|
||||
ensure_pmtiles_cli(bin_path, args.pmtiles_version)
|
||||
|
||||
source_url = find_latest_build()
|
||||
print(f"Extracting UK tiles from {source_url}...")
|
||||
|
||||
subprocess.run(
|
||||
[str(bin_path), "extract", source_url, str(args.output), f"--bbox={UK_BBOX}"],
|
||||
check=True,
|
||||
)
|
||||
|
||||
size_mb = args.output.stat().st_size / (1024 * 1024)
|
||||
print(f"Wrote {args.output} ({size_mb:.1f} MB)")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -55,12 +55,16 @@ def _build_wide(
|
|||
)
|
||||
)
|
||||
|
||||
arcgis = pl.scan_parquet(arcgis_path).select(
|
||||
pl.col("pcds").alias("postcode"),
|
||||
"lat",
|
||||
pl.col("long").alias("lon"),
|
||||
"lsoa21",
|
||||
"oa21",
|
||||
arcgis = (
|
||||
pl.scan_parquet(arcgis_path)
|
||||
.filter(pl.col("ctry") == "E92000001") # England only
|
||||
.select(
|
||||
pl.col("pcds").alias("postcode"),
|
||||
"lat",
|
||||
pl.col("long").alias("lon"),
|
||||
"lsoa21",
|
||||
"oa21",
|
||||
)
|
||||
)
|
||||
wide = wide.join(arcgis, on="postcode", how="full", coalesce=True)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue