Extract common download utils
This commit is contained in:
parent
6ddb3d2121
commit
3b9ad11d71
9 changed files with 152 additions and 161 deletions
40
pipeline/utils/download.py
Normal file
40
pipeline/utils/download.py
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
"""Shared download and extraction helpers for pipeline scripts."""
|
||||
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
|
||||
import httpx
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
def download(url: str, output_path: Path, *, timeout: float = 120) -> None:
|
||||
"""Stream-download a URL to a local file with a tqdm progress bar."""
|
||||
with httpx.stream(
|
||||
"GET",
|
||||
url,
|
||||
follow_redirects=True,
|
||||
timeout=httpx.Timeout(30.0, read=timeout),
|
||||
) as response:
|
||||
response.raise_for_status() # pyright: ignore[reportUnusedCallResult]
|
||||
total = int(response.headers.get("content-length", 0))
|
||||
|
||||
with (
|
||||
open(output_path, "wb") as f,
|
||||
tqdm(
|
||||
total=total or None,
|
||||
unit="B",
|
||||
unit_scale=True,
|
||||
unit_divisor=1024,
|
||||
desc=output_path.name,
|
||||
) as pbar,
|
||||
):
|
||||
for chunk in response.iter_bytes(chunk_size=8192):
|
||||
f.write(chunk)
|
||||
pbar.update(len(chunk))
|
||||
|
||||
|
||||
def extract_zip(zip_path: Path, extract_dir: Path) -> None:
|
||||
"""Extract a ZIP archive into the given directory."""
|
||||
extract_dir.mkdir(parents=True, exist_ok=True)
|
||||
with zipfile.ZipFile(zip_path, "r") as zf:
|
||||
zf.extractall(extract_dir)
|
||||
Loading…
Add table
Add a link
Reference in a new issue