40 lines
1.2 KiB
Python
40 lines
1.2 KiB
Python
"""Shared download and extraction helpers for pipeline scripts."""
|
|
|
|
import zipfile
|
|
from pathlib import Path
|
|
|
|
import httpx
|
|
from tqdm import tqdm
|
|
|
|
|
|
def download(url: str, output_path: Path, *, timeout: float = 120) -> None:
|
|
"""Stream-download a URL to a local file with a tqdm progress bar."""
|
|
with httpx.stream(
|
|
"GET",
|
|
url,
|
|
follow_redirects=True,
|
|
timeout=httpx.Timeout(30.0, read=timeout),
|
|
) as response:
|
|
response.raise_for_status() # pyright: ignore[reportUnusedCallResult]
|
|
total = int(response.headers.get("content-length", 0))
|
|
|
|
with (
|
|
open(output_path, "wb") as f,
|
|
tqdm(
|
|
total=total or None,
|
|
unit="B",
|
|
unit_scale=True,
|
|
unit_divisor=1024,
|
|
desc=output_path.name,
|
|
) as pbar,
|
|
):
|
|
for chunk in response.iter_bytes(chunk_size=8192):
|
|
f.write(chunk)
|
|
pbar.update(len(chunk))
|
|
|
|
|
|
def extract_zip(zip_path: Path, extract_dir: Path) -> None:
|
|
"""Extract a ZIP archive into the given directory."""
|
|
extract_dir.mkdir(parents=True, exist_ok=True)
|
|
with zipfile.ZipFile(zip_path, "r") as zf:
|
|
zf.extractall(extract_dir, filter="data")
|