Extract delete corrupt images

This commit is contained in:
Andras Schmelczer 2024-07-08 07:36:56 +01:00
parent 36cf0d3d35
commit 564134145a
No known key found for this signature in database
GPG key ID: FC8F2C3D3D1A718C
3 changed files with 26 additions and 17 deletions

View file

@ -14,7 +14,6 @@ def get_data_loader(
paths=data,
edit_count=edit_count,
bin_count=bin_count,
delete_corrupt_images=False,
cache_path=CACHE_PATH,
),
batch_size=batch_size,

View file

@ -3,7 +3,6 @@ from typing import List, Optional, Tuple
from utils import compute_histogram
from operations.random_edit import random_edit
from PIL import Image
from tqdm import tqdm
import logging
import torch
from pathlib import Path
@ -21,7 +20,6 @@ class HistogramDataset(Dataset):
bin_count: int = 16,
edit_count: int = 12,
target_size=(240, 240),
delete_corrupt_images: bool = False,
cache_path: Optional[Path] = None,
):
self._paths = sorted(paths)
@ -37,20 +35,6 @@ class HistogramDataset(Dataset):
/ f"{self._bin_count}bins_{self._target_size[0]}x{self._target_size[1]}px"
)
if delete_corrupt_images:
self._delete_corrupt_images()
def _delete_corrupt_images(self) -> None:
deleted_count = 0
for path in tqdm(self._paths):
try:
Image.open(path)
except:
logging.warning(f"Failed to open {path}, deleting...")
deleted_count += 1
path.unlink()
logging.info(f"Deleted {deleted_count} corrupt images")
def __len__(self):
return len(self._paths) * self._edit_count

View file

@ -0,0 +1,26 @@
import logging
from pathlib import Path
from typing import List
from PIL import Image
import PIL.Image
from tqdm import tqdm
PIL.Image.MAX_IMAGE_PIXELS = None
def delete_corrupt_images(paths: List[Path]) -> None:
deleted_count = 0
for path in tqdm(paths):
if not path.exists():
logging.warning(f"{path} does not exist, skipping...")
continue
try:
Image.open(path)
except KeyboardInterrupt:
logging.info("Keyboard interrupt, exiting...")
raise
except:
logging.warning(f"Failed to open {path}, deleting...")
deleted_count += 1
path.unlink()
logging.info(f"Deleted {deleted_count} corrupt images")