"""Shared download and extraction helpers for pipeline scripts.""" import zipfile from pathlib import Path import httpx from tqdm import tqdm def download(url: str, output_path: Path, *, timeout: float = 120) -> None: """Stream-download a URL to a local file with a tqdm progress bar.""" with httpx.stream( "GET", url, follow_redirects=True, timeout=httpx.Timeout(30.0, read=timeout), ) as response: response.raise_for_status() # pyright: ignore[reportUnusedCallResult] total = int(response.headers.get("content-length", 0)) with ( open(output_path, "wb") as f, tqdm( total=total or None, unit="B", unit_scale=True, unit_divisor=1024, desc=output_path.name, ) as pbar, ): for chunk in response.iter_bytes(chunk_size=8192): f.write(chunk) pbar.update(len(chunk)) def extract_zip(zip_path: Path, extract_dir: Path) -> None: """Extract a ZIP archive into the given directory.""" extract_dir.mkdir(parents=True, exist_ok=True) with zipfile.ZipFile(zip_path, "r") as zf: zf.extractall(extract_dir, filter="data")