From 564134145a0f32f3d5dd490ea79db801822f26c1 Mon Sep 17 00:00:00 2001 From: Andras Schmelczer Date: Mon, 8 Jul 2024 07:36:56 +0100 Subject: [PATCH] Extract delete corrupt images --- src/training/get_data_loader.py | 1 - src/training/histogram_dataset.py | 16 ---------------- src/utils/delete_corrupt_images.py | 26 ++++++++++++++++++++++++++ 3 files changed, 26 insertions(+), 17 deletions(-) create mode 100644 src/utils/delete_corrupt_images.py diff --git a/src/training/get_data_loader.py b/src/training/get_data_loader.py index b6735a9..0af7bce 100644 --- a/src/training/get_data_loader.py +++ b/src/training/get_data_loader.py @@ -14,7 +14,6 @@ def get_data_loader( paths=data, edit_count=edit_count, bin_count=bin_count, - delete_corrupt_images=False, cache_path=CACHE_PATH, ), batch_size=batch_size, diff --git a/src/training/histogram_dataset.py b/src/training/histogram_dataset.py index 306112f..d51b005 100644 --- a/src/training/histogram_dataset.py +++ b/src/training/histogram_dataset.py @@ -3,7 +3,6 @@ from typing import List, Optional, Tuple from utils import compute_histogram from operations.random_edit import random_edit from PIL import Image -from tqdm import tqdm import logging import torch from pathlib import Path @@ -21,7 +20,6 @@ class HistogramDataset(Dataset): bin_count: int = 16, edit_count: int = 12, target_size=(240, 240), - delete_corrupt_images: bool = False, cache_path: Optional[Path] = None, ): self._paths = sorted(paths) @@ -37,20 +35,6 @@ class HistogramDataset(Dataset): / f"{self._bin_count}bins_{self._target_size[0]}x{self._target_size[1]}px" ) - if delete_corrupt_images: - self._delete_corrupt_images() - - def _delete_corrupt_images(self) -> None: - deleted_count = 0 - for path in tqdm(self._paths): - try: - Image.open(path) - except: - logging.warning(f"Failed to open {path}, deleting...") - deleted_count += 1 - path.unlink() - logging.info(f"Deleted {deleted_count} corrupt images") - def __len__(self): return len(self._paths) * self._edit_count diff --git a/src/utils/delete_corrupt_images.py b/src/utils/delete_corrupt_images.py new file mode 100644 index 0000000..15bbfee --- /dev/null +++ b/src/utils/delete_corrupt_images.py @@ -0,0 +1,26 @@ +import logging +from pathlib import Path +from typing import List +from PIL import Image +import PIL.Image +from tqdm import tqdm + +PIL.Image.MAX_IMAGE_PIXELS = None + + +def delete_corrupt_images(paths: List[Path]) -> None: + deleted_count = 0 + for path in tqdm(paths): + if not path.exists(): + logging.warning(f"{path} does not exist, skipping...") + continue + try: + Image.open(path) + except KeyboardInterrupt: + logging.info("Keyboard interrupt, exiting...") + raise + except: + logging.warning(f"Failed to open {path}, deleting...") + deleted_count += 1 + path.unlink() + logging.info(f"Deleted {deleted_count} corrupt images")