Extract delete corrupt images
This commit is contained in:
parent
36cf0d3d35
commit
564134145a
3 changed files with 26 additions and 17 deletions
|
|
@ -14,7 +14,6 @@ def get_data_loader(
|
|||
paths=data,
|
||||
edit_count=edit_count,
|
||||
bin_count=bin_count,
|
||||
delete_corrupt_images=False,
|
||||
cache_path=CACHE_PATH,
|
||||
),
|
||||
batch_size=batch_size,
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@ from typing import List, Optional, Tuple
|
|||
from utils import compute_histogram
|
||||
from operations.random_edit import random_edit
|
||||
from PIL import Image
|
||||
from tqdm import tqdm
|
||||
import logging
|
||||
import torch
|
||||
from pathlib import Path
|
||||
|
|
@ -21,7 +20,6 @@ class HistogramDataset(Dataset):
|
|||
bin_count: int = 16,
|
||||
edit_count: int = 12,
|
||||
target_size=(240, 240),
|
||||
delete_corrupt_images: bool = False,
|
||||
cache_path: Optional[Path] = None,
|
||||
):
|
||||
self._paths = sorted(paths)
|
||||
|
|
@ -37,20 +35,6 @@ class HistogramDataset(Dataset):
|
|||
/ f"{self._bin_count}bins_{self._target_size[0]}x{self._target_size[1]}px"
|
||||
)
|
||||
|
||||
if delete_corrupt_images:
|
||||
self._delete_corrupt_images()
|
||||
|
||||
def _delete_corrupt_images(self) -> None:
|
||||
deleted_count = 0
|
||||
for path in tqdm(self._paths):
|
||||
try:
|
||||
Image.open(path)
|
||||
except:
|
||||
logging.warning(f"Failed to open {path}, deleting...")
|
||||
deleted_count += 1
|
||||
path.unlink()
|
||||
logging.info(f"Deleted {deleted_count} corrupt images")
|
||||
|
||||
def __len__(self):
|
||||
return len(self._paths) * self._edit_count
|
||||
|
||||
|
|
|
|||
26
src/utils/delete_corrupt_images.py
Normal file
26
src/utils/delete_corrupt_images.py
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
import logging
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
from PIL import Image
|
||||
import PIL.Image
|
||||
from tqdm import tqdm
|
||||
|
||||
PIL.Image.MAX_IMAGE_PIXELS = None
|
||||
|
||||
|
||||
def delete_corrupt_images(paths: List[Path]) -> None:
|
||||
deleted_count = 0
|
||||
for path in tqdm(paths):
|
||||
if not path.exists():
|
||||
logging.warning(f"{path} does not exist, skipping...")
|
||||
continue
|
||||
try:
|
||||
Image.open(path)
|
||||
except KeyboardInterrupt:
|
||||
logging.info("Keyboard interrupt, exiting...")
|
||||
raise
|
||||
except:
|
||||
logging.warning(f"Failed to open {path}, deleting...")
|
||||
deleted_count += 1
|
||||
path.unlink()
|
||||
logging.info(f"Deleted {deleted_count} corrupt images")
|
||||
Loading…
Add table
Add a link
Reference in a new issue