move files
This commit is contained in:
parent
1a41fd6829
commit
231e22cac8
36 changed files with 15580 additions and 79653 deletions
66
src/colour_lut.ipynb
Normal file
66
src/colour_lut.ipynb
Normal file
File diff suppressed because one or more lines are too long
9
src/config.py
Normal file
9
src/config.py
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
from pathlib import Path
|
||||
|
||||
DATA = sorted(Path("/mnt/wsl/PHYSICALDRIVE1/data/unsplash").glob("*.jpg"))
|
||||
|
||||
CACHE_PATH = Path("/mnt/wsl/PHYSICALDRIVE1/data/cache2")
|
||||
CACHE_PATH.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
MODELS_PATH = Path("models")
|
||||
MODELS_PATH.mkdir(exist_ok=True, parents=True)
|
||||
59
src/create_edits.ipynb
Normal file
59
src/create_edits.ipynb
Normal file
File diff suppressed because one or more lines are too long
0
src/editor/__init__.py
Normal file
0
src/editor/__init__.py
Normal file
3
src/editor/histogram_transfer/__init__.py
Normal file
3
src/editor/histogram_transfer/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
from .regrain import regrain
|
||||
from .pdf_transfer_1d import pdf_transfer_1d
|
||||
from .pdf_transfer_3d import pdf_transfer_3d
|
||||
13
src/editor/histogram_transfer/pdf_transfer_1d.py
Normal file
13
src/editor/histogram_transfer/pdf_transfer_1d.py
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
import numpy as np
|
||||
|
||||
|
||||
def pdf_transfer_1d(pX: np.ndarray, pY: np.ndarray) -> np.ndarray:
|
||||
PX = np.cumsum(pX + np.finfo(float).eps)
|
||||
PX /= PX[-1]
|
||||
|
||||
PY = np.cumsum(pY + np.finfo(float).eps)
|
||||
PY /= PY[-1]
|
||||
|
||||
f = np.interp(PX, PY, np.arange(len(pX)))
|
||||
|
||||
return f
|
||||
46
src/editor/histogram_transfer/pdf_transfer_3d.py
Normal file
46
src/editor/histogram_transfer/pdf_transfer_3d.py
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
import numpy as np
|
||||
from editor.utils import generate_rotation_matrices
|
||||
from editor.histogram_transfer import pdf_transfer_1d
|
||||
from editor.histogram_transfer import regrain
|
||||
|
||||
|
||||
EPSILON = 1e-6
|
||||
|
||||
|
||||
def pdf_transfer_3d(
|
||||
source: np.ndarray,
|
||||
target_flattened: np.ndarray,
|
||||
relaxation: float = 1,
|
||||
bin_count: int = 1000,
|
||||
iterations: int = 25,
|
||||
smoothness: float = 1,
|
||||
):
|
||||
[h, w, c] = source.shape
|
||||
source_flattened = source.reshape(-1, c).transpose()
|
||||
|
||||
rotation_matrices = generate_rotation_matrices(iterations)
|
||||
for i, rotation in enumerate(rotation_matrices, start=1):
|
||||
D0R = rotation @ source_flattened
|
||||
D1R = rotation @ target_flattened
|
||||
D0R_ = np.zeros_like(source_flattened)
|
||||
|
||||
for i in range(rotation.shape[0]):
|
||||
datamin = min(np.min(D0R[i, :]), np.min(D1R[i, :])) - EPSILON
|
||||
datamax = max(np.max(D0R[i, :]), np.max(D1R[i, :])) + EPSILON
|
||||
u = np.linspace(datamin, datamax, bin_count)
|
||||
|
||||
p0R, _ = np.histogram(D0R[i, :], bins=u, density=True)
|
||||
p1R, _ = np.histogram(D1R[i, :], bins=u, density=True)
|
||||
|
||||
f = pdf_transfer_1d(p0R, p1R)
|
||||
mapped_values = (
|
||||
np.interp(D0R[i, :], u[:-1], f) * (datamax - datamin) / (bin_count - 1)
|
||||
+ datamin
|
||||
)
|
||||
D0R_[i, :] = mapped_values
|
||||
|
||||
source_flattened = source_flattened + relaxation * (rotation.T @ (D0R_ - D0R))
|
||||
source_flattened.clip(0, 255, out=source_flattened)
|
||||
|
||||
result = source_flattened.astype(np.uint8).transpose().reshape(h, w, c)
|
||||
return regrain(source, result, smoothness=smoothness)
|
||||
87
src/editor/histogram_transfer/regrain.py
Normal file
87
src/editor/histogram_transfer/regrain.py
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
from scipy.ndimage import zoom
|
||||
import numpy as np
|
||||
|
||||
|
||||
NBITS = [4, 16, 32, 64, 64, 64]
|
||||
|
||||
|
||||
def regrain(img_arr_in, img_arr_col, smoothness: float = 1):
|
||||
"""keep gradient of img_arr_in and color of img_arr_col."""
|
||||
|
||||
img_arr_in = img_arr_in / 255.0
|
||||
img_arr_col = img_arr_col / 255.0
|
||||
img_arr_out = np.array(img_arr_in)
|
||||
img_arr_out = _regrain_rec(
|
||||
img_arr_out, img_arr_in, img_arr_col, NBITS, 0, smoothness
|
||||
)
|
||||
img_arr_out[img_arr_out < 0] = 0
|
||||
img_arr_out[img_arr_out > 1] = 1
|
||||
img_arr_out = (255.0 * img_arr_out).astype("uint8")
|
||||
return img_arr_out
|
||||
|
||||
|
||||
def _regrain_rec(img_arr_out, img_arr_in, img_arr_col, nbits, level, smoothness):
|
||||
[h, w, _] = img_arr_in.shape
|
||||
h2 = (h + 1) // 2
|
||||
w2 = (w + 1) // 2
|
||||
if len(nbits) > 1 and h2 > 20 and w2 > 20:
|
||||
resize_arr_in = _resize_image(img_arr_in, w2, h2)
|
||||
resize_arr_col = _resize_image(img_arr_col, w2, h2)
|
||||
resize_arr_out = _resize_image(img_arr_out, w2, h2)
|
||||
resize_arr_out = _regrain_rec(
|
||||
resize_arr_out,
|
||||
resize_arr_in,
|
||||
resize_arr_col,
|
||||
nbits[1:],
|
||||
level + 1,
|
||||
smoothness,
|
||||
)
|
||||
img_arr_out = _resize_image(resize_arr_out, w, h)
|
||||
img_arr_out = _solve(
|
||||
img_arr_out, img_arr_in, img_arr_col, nbits[0], level, smoothness
|
||||
)
|
||||
return img_arr_out
|
||||
|
||||
|
||||
def _solve(img_arr_out, img_arr_in, img_arr_col, nbit, level, smoothness):
|
||||
[width, height, c] = img_arr_in.shape
|
||||
first_pad_0 = lambda arr: np.concatenate((arr[:1, :], arr[:-1, :]), axis=0)
|
||||
first_pad_1 = lambda arr: np.concatenate((arr[:, :1], arr[:, :-1]), axis=1)
|
||||
last_pad_0 = lambda arr: np.concatenate((arr[1:, :], arr[-1:, :]), axis=0)
|
||||
last_pad_1 = lambda arr: np.concatenate((arr[:, 1:], arr[:, -1:]), axis=1)
|
||||
|
||||
delta_x = last_pad_1(img_arr_in) - first_pad_1(img_arr_in)
|
||||
delta_y = last_pad_0(img_arr_in) - first_pad_0(img_arr_in)
|
||||
delta = np.sqrt((delta_x**2 + delta_y**2).sum(axis=2, keepdims=True))
|
||||
|
||||
psi = 256 * delta / 5
|
||||
psi[psi > 1] = 1
|
||||
phi = 30 * 2 ** (-level) / (1 + 10 * delta / smoothness)
|
||||
|
||||
phi1 = (last_pad_1(phi) + phi) / 2
|
||||
phi2 = (last_pad_0(phi) + phi) / 2
|
||||
phi3 = (first_pad_1(phi) + phi) / 2
|
||||
phi4 = (first_pad_0(phi) + phi) / 2
|
||||
|
||||
rho = 1 / 5.0
|
||||
for i in range(nbit):
|
||||
den = psi + phi1 + phi2 + phi3 + phi4
|
||||
num = (
|
||||
np.tile(psi, [1, 1, c]) * img_arr_col
|
||||
+ np.tile(phi1, [1, 1, c])
|
||||
* (last_pad_1(img_arr_out) - last_pad_1(img_arr_in) + img_arr_in)
|
||||
+ np.tile(phi2, [1, 1, c])
|
||||
* (last_pad_0(img_arr_out) - last_pad_0(img_arr_in) + img_arr_in)
|
||||
+ np.tile(phi3, [1, 1, c])
|
||||
* (first_pad_1(img_arr_out) - first_pad_1(img_arr_in) + img_arr_in)
|
||||
+ np.tile(phi4, [1, 1, c])
|
||||
* (first_pad_0(img_arr_out) - first_pad_0(img_arr_in) + img_arr_in)
|
||||
)
|
||||
img_arr_out = (
|
||||
num / np.tile(den + 1e-6, [1, 1, c]) * (1 - rho) + rho * img_arr_out
|
||||
)
|
||||
return img_arr_out
|
||||
|
||||
|
||||
def _resize_image(data, target_width, target_height):
|
||||
return zoom(data, (target_height / data.shape[0], target_width / data.shape[1], 1))
|
||||
0
src/editor/image_editor.py
Normal file
0
src/editor/image_editor.py
Normal file
3
src/editor/operations/__init__.py
Normal file
3
src/editor/operations/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
from .add_noise import add_noise
|
||||
from .change_temperature import change_temperature
|
||||
from .add_random_colour_spill import add_random_colour_spill
|
||||
11
src/editor/operations/add_noise.py
Normal file
11
src/editor/operations/add_noise.py
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
|
||||
def add_noise(img: Image, alpha: float) -> Image:
|
||||
img = img.convert("RGB")
|
||||
width, height = img.size
|
||||
random_colors = np.random.randint(0, 256, (height, width, 3), dtype=np.uint8)
|
||||
random_img = Image.fromarray(random_colors, mode="RGB")
|
||||
result = Image.blend(img, random_img, alpha)
|
||||
return result
|
||||
20
src/editor/operations/add_random_colour_spill.py
Normal file
20
src/editor/operations/add_random_colour_spill.py
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
from PIL import Image
|
||||
from ..utils import random
|
||||
|
||||
|
||||
def add_random_colour_spill(image: Image, range: float) -> Image:
|
||||
matrix = (
|
||||
random(1 / range, range),
|
||||
0.0,
|
||||
0.0,
|
||||
0.0,
|
||||
0.0,
|
||||
random(1 / range, range),
|
||||
0.0,
|
||||
0.0,
|
||||
0.0,
|
||||
0.0,
|
||||
random(1 / range, range),
|
||||
0.0,
|
||||
)
|
||||
return image.convert("RGB", matrix)
|
||||
42
src/editor/operations/change_temperature.py
Normal file
42
src/editor/operations/change_temperature.py
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
from PIL import Image
|
||||
|
||||
kelvin_table = {
|
||||
1000: (255, 56, 0),
|
||||
1500: (255, 109, 0),
|
||||
2000: (255, 137, 18),
|
||||
2500: (255, 161, 72),
|
||||
3000: (255, 180, 107),
|
||||
3500: (255, 196, 137),
|
||||
4000: (255, 209, 163),
|
||||
4500: (255, 219, 186),
|
||||
5000: (255, 228, 206),
|
||||
5500: (255, 236, 224),
|
||||
6000: (255, 243, 239),
|
||||
6500: (255, 249, 253),
|
||||
7000: (245, 243, 255),
|
||||
7500: (235, 238, 255),
|
||||
8000: (227, 233, 255),
|
||||
8500: (220, 229, 255),
|
||||
9000: (214, 225, 255),
|
||||
9500: (208, 222, 255),
|
||||
10000: (204, 219, 255),
|
||||
}
|
||||
|
||||
|
||||
def change_temperature(image: Image, temperature: float) -> Image:
|
||||
r, g, b = kelvin_table[temperature]
|
||||
matrix = (
|
||||
r / 255.0,
|
||||
0.0,
|
||||
0.0,
|
||||
0.0,
|
||||
0.0,
|
||||
g / 255.0,
|
||||
0.0,
|
||||
0.0,
|
||||
0.0,
|
||||
0.0,
|
||||
b / 255.0,
|
||||
0.0,
|
||||
)
|
||||
return image.convert("RGB", matrix)
|
||||
3
src/editor/training/__init__.py
Normal file
3
src/editor/training/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
from .histogram_dataset import HistogramDataset
|
||||
from .random_edit import random_edit
|
||||
from .progressive_pooling_loss import ProgressivePoolingLoss
|
||||
89
src/editor/training/histogram_dataset.py
Normal file
89
src/editor/training/histogram_dataset.py
Normal file
|
|
@ -0,0 +1,89 @@
|
|||
from torch.utils.data import Dataset
|
||||
from typing import List, Optional, Tuple
|
||||
from editor.utils import compute_histogram
|
||||
from .random_edit import random_edit
|
||||
from PIL import Image
|
||||
from tqdm import tqdm
|
||||
import torch
|
||||
from pathlib import Path
|
||||
|
||||
import PIL.Image
|
||||
|
||||
PIL.Image.MAX_IMAGE_PIXELS = None
|
||||
|
||||
|
||||
class HistogramDataset(Dataset):
|
||||
def __init__(
|
||||
self,
|
||||
paths: List[Path],
|
||||
edit_count: int = 5,
|
||||
bin_count: int = 32,
|
||||
target_size=(480, 480),
|
||||
delete_corrupt_images: bool = False,
|
||||
cache_path: Optional[Path] = None,
|
||||
):
|
||||
self._paths = sorted(paths)
|
||||
self._edit_count = edit_count
|
||||
self._bin_count = bin_count
|
||||
self._target_size = target_size
|
||||
self._cache_path = cache_path
|
||||
|
||||
if delete_corrupt_images:
|
||||
self._delete_corrupt_images()
|
||||
|
||||
def _delete_corrupt_images(self) -> None:
|
||||
deleted_count = 0
|
||||
for path in tqdm(self._paths):
|
||||
try:
|
||||
Image.open(path)
|
||||
except:
|
||||
print(f"Failed to open {path}, deleting...")
|
||||
deleted_count += 1
|
||||
path.unlink()
|
||||
print(f"Deleted {deleted_count} corrupt images")
|
||||
|
||||
def __len__(self):
|
||||
return len(self._paths) * self._edit_count
|
||||
|
||||
def get_original_image(self, original_idx: int) -> Image.Image:
|
||||
original_path = self._paths[original_idx]
|
||||
original = Image.open(original_path)
|
||||
original.thumbnail(
|
||||
self._target_size, Image.Resampling.LANCZOS
|
||||
) # size will be at most target_size, the aspect ratio is preserved
|
||||
return original
|
||||
|
||||
def get_edited_image(self, original_idx: int, edit_idx: int) -> Image.Image:
|
||||
original_image = self.get_original_image(original_idx)
|
||||
return random_edit(original_image, seed=edit_idx)
|
||||
|
||||
def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor]:
|
||||
if self._cache_path is not None:
|
||||
self._cached_data_path = self._cache_path / f"{idx}.pt"
|
||||
if self._cached_data_path.exists():
|
||||
try:
|
||||
return torch.load(self._cached_data_path)
|
||||
except:
|
||||
print(f"Failed to load {self._cached_data_path}, regenerating...")
|
||||
|
||||
original_idx = idx // self._edit_count
|
||||
original = self.get_original_image(original_idx)
|
||||
edited = random_edit(original, seed=idx)
|
||||
|
||||
edited_histogram = compute_histogram(
|
||||
edited, bins=self._bin_count, normalize=True
|
||||
)
|
||||
|
||||
original_histogram = compute_histogram(
|
||||
original, bins=self._bin_count, normalize=True
|
||||
)
|
||||
|
||||
result = (
|
||||
torch.tensor(edited_histogram, dtype=torch.float).unsqueeze(0),
|
||||
torch.tensor(original_histogram, dtype=torch.float).unsqueeze(0),
|
||||
)
|
||||
|
||||
if self._cache_path is not None:
|
||||
torch.save(result, self._cached_data_path)
|
||||
|
||||
return result
|
||||
38
src/editor/training/progressive_pooling_loss.py
Normal file
38
src/editor/training/progressive_pooling_loss.py
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
from typing import List
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
|
||||
class ProgressivePoolingLoss(nn.Module):
|
||||
def __init__(self, target_sizes: List[int], damping: float):
|
||||
super(ProgressivePoolingLoss, self).__init__()
|
||||
self._target_sizes = target_sizes
|
||||
self._damping = damping
|
||||
|
||||
def forward(self, tensor_a, tensor_b):
|
||||
assert (
|
||||
tensor_a.size() == tensor_b.size()
|
||||
), f"Input tensors must have the same size, got {tensor_a.size()} and {tensor_b.size()}"
|
||||
|
||||
assert (
|
||||
len(tensor_a.size()) == 5
|
||||
), f"Input tensors must have 5 dimensions, got {tensor_a.size()}"
|
||||
|
||||
_minibatch_size, _channels, depth, height, width = tensor_a.size()
|
||||
assert depth == height == width, "Input tensors must be cubes."
|
||||
|
||||
loss = 0.0
|
||||
weight = 1
|
||||
|
||||
for target_size in self._target_sizes:
|
||||
pool_size = depth // target_size
|
||||
pooled_a = F.avg_pool3d(tensor_a, pool_size) * (pool_size**3)
|
||||
pooled_b = F.avg_pool3d(tensor_b, pool_size) * (pool_size**3)
|
||||
|
||||
diff = torch.abs(pooled_a - pooled_b)
|
||||
|
||||
loss += diff.mean() * weight
|
||||
weight *= self._damping
|
||||
|
||||
return loss
|
||||
19
src/editor/training/random_edit.py
Normal file
19
src/editor/training/random_edit.py
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
from PIL import Image, ImageEnhance
|
||||
from ..utils import random, get_colour_lut, apply_pixel_shader
|
||||
from ..operations import add_noise, add_random_colour_spill
|
||||
import numpy as np
|
||||
|
||||
|
||||
def random_edit(img: Image, seed: int = 42) -> Image:
|
||||
np.random.seed(seed)
|
||||
img = add_noise(img, random(0, 0.2))
|
||||
img = ImageEnhance.Contrast(img).enhance(random(0.5, 2))
|
||||
img = add_random_colour_spill(img, 1.3)
|
||||
img = img.convert("HSV")
|
||||
saturation_lut = get_colour_lut(variance=0.3, count=5, type="linear")
|
||||
brightness_lut = get_colour_lut(variance=0.3, count=5, type="cubic")
|
||||
img = apply_pixel_shader(
|
||||
img, lambda h, s, v: (h, saturation_lut[s], brightness_lut[v])
|
||||
)
|
||||
img = img.convert("RGB")
|
||||
return img
|
||||
7
src/editor/utils/__init__.py
Normal file
7
src/editor/utils/__init__.py
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
from .interpolate import interpolate
|
||||
from .random import random
|
||||
from .apply_pixel_shader import apply_pixel_shader
|
||||
from .get_colour_lut import get_colour_lut
|
||||
from .compute_histogram import compute_histogram
|
||||
from .kldiv import kldiv
|
||||
from .generate_rotation_matrices import generate_rotation_matrices
|
||||
14
src/editor/utils/apply_pixel_shader.py
Normal file
14
src/editor/utils/apply_pixel_shader.py
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
from typing import Callable, Tuple
|
||||
from PIL import Image
|
||||
|
||||
|
||||
def apply_pixel_shader(
|
||||
img: Image, callback: Callable[[int, int, int], Tuple[int, int, int]]
|
||||
):
|
||||
width, height = img.size
|
||||
pixels = img.load()
|
||||
for x in range(width):
|
||||
for y in range(height):
|
||||
r, g, b = pixels[x, y]
|
||||
pixels[x, y] = callback(r, g, b)
|
||||
return img
|
||||
22
src/editor/utils/compute_histogram.py
Normal file
22
src/editor/utils/compute_histogram.py
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
from PIL import Image
|
||||
import numpy as np
|
||||
|
||||
|
||||
def compute_histogram(
|
||||
image: Image.Image | np.ndarray,
|
||||
bins: int,
|
||||
value_range=(0, 256),
|
||||
normalize: bool = True,
|
||||
) -> np.ndarray:
|
||||
image = np.array(image) if isinstance(image, Image.Image) else image
|
||||
|
||||
histogram, _ = np.histogramdd(
|
||||
image.reshape(-1, 3), bins=bins, range=[value_range, value_range, value_range]
|
||||
)
|
||||
|
||||
histogram = histogram.astype(np.float32)
|
||||
|
||||
if normalize:
|
||||
histogram = histogram / np.sum(histogram)
|
||||
|
||||
return histogram
|
||||
66
src/editor/utils/generate_rotation_matrices.py
Normal file
66
src/editor/utils/generate_rotation_matrices.py
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
from random import shuffle
|
||||
from typing import List, Tuple
|
||||
import numpy as np
|
||||
from functools import lru_cache
|
||||
from numpy.typing import NDArray
|
||||
|
||||
|
||||
@lru_cache
|
||||
def generate_rotation_matrices(count: int) -> List[NDArray[np.float64]]:
|
||||
axes = fibonacci_sphere(count)
|
||||
shuffle(axes)
|
||||
angles = np.linspace(0, 2 * np.pi, count, endpoint=False)
|
||||
matrices = [_rotation_matrix(axis, angle) for axis, angle in zip(axes, angles)]
|
||||
for matrix in matrices:
|
||||
_check_rotation_matrix(matrix)
|
||||
return matrices
|
||||
|
||||
|
||||
def fibonacci_sphere(samples: int) -> List[Tuple[float, float, float]]:
|
||||
points = []
|
||||
phi = np.pi * (3.0 - np.sqrt(5.0)) # Golden angle in radians
|
||||
for i in range(samples):
|
||||
y = 1 - (i / float(samples - 1)) * 2 # y goes from 1 to -1
|
||||
radius = np.sqrt(1 - y * y) # radius at y
|
||||
|
||||
theta = phi * i # golden angle increment
|
||||
|
||||
x = np.cos(theta) * radius
|
||||
z = np.sin(theta) * radius
|
||||
|
||||
points.append([x, y, z])
|
||||
return points
|
||||
|
||||
|
||||
def _rotation_matrix(
|
||||
axis: Tuple[float, float, float], theta: float
|
||||
) -> NDArray[np.float64]:
|
||||
axis = np.asarray(axis)
|
||||
axis = axis / np.sqrt(np.dot(axis, axis))
|
||||
a = np.cos(theta / 2.0)
|
||||
b, c, d = -axis * np.sin(theta / 2.0)
|
||||
|
||||
aa, bb, cc, dd = a * a, b * b, c * c, d * d
|
||||
bc, ad, ac, ab, bd, cd = b * c, a * d, a * c, a * b, b * d, c * d
|
||||
return np.array(
|
||||
[
|
||||
[aa + bb - cc - dd, 2 * (bc + ad), 2 * (bd - ac)],
|
||||
[2 * (bc - ad), aa + cc - bb - dd, 2 * (cd + ab)],
|
||||
[2 * (bd + ac), 2 * (cd - ab), aa + dd - bb - cc],
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def _check_rotation_matrix(R: NDArray[np.float64]):
|
||||
# Check if the matrix is square
|
||||
if R.shape != (3, 3):
|
||||
raise ValueError("Matrix must be 3x3.")
|
||||
|
||||
# Check orthogonality: R.T * R should be close to the identity matrix
|
||||
I = np.eye(3)
|
||||
if not np.allclose(np.dot(R.T, R), I):
|
||||
raise ValueError("allclose")
|
||||
|
||||
# Check determinant: Should be +1
|
||||
if not np.isclose(np.linalg.det(R), 1.0):
|
||||
raise ValueError(f"det {np.linalg.det(R)}")
|
||||
21
src/editor/utils/get_colour_lut.py
Normal file
21
src/editor/utils/get_colour_lut.py
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
import numpy as np
|
||||
from typing import List
|
||||
from .random import random
|
||||
from .interpolate import interpolate, INTERPOLATION_TYPE
|
||||
|
||||
|
||||
def get_edit_points(variance: float, count: int) -> List[float]:
|
||||
return [
|
||||
random(i / (count - 1) - variance, i / (count - 1) + variance)
|
||||
for i in range(count)
|
||||
]
|
||||
|
||||
|
||||
def get_colour_lut(
|
||||
variance=0.1, count=5, type: INTERPOLATION_TYPE = "cubic"
|
||||
) -> List[int]:
|
||||
edit_points = get_edit_points(variance=variance, count=count)
|
||||
return [
|
||||
round(interpolate(edit_points, i / 255, type=type) * 255)
|
||||
for i in np.linspace(0, 255, 256)
|
||||
]
|
||||
35
src/editor/utils/interpolate.py
Normal file
35
src/editor/utils/interpolate.py
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
import numpy as np
|
||||
from scipy.interpolate import CubicSpline
|
||||
from typing import List, Literal
|
||||
|
||||
|
||||
INTERPOLATION_TYPE = Literal["cubic", "linear"]
|
||||
|
||||
|
||||
def interpolate(
|
||||
control_points: List[float], t: float, type: INTERPOLATION_TYPE
|
||||
) -> float:
|
||||
control_points = sorted(control_points)
|
||||
|
||||
if type == "cubic":
|
||||
x = np.linspace(0, 1, len(control_points))
|
||||
cs = CubicSpline(x, control_points)
|
||||
return cs(t)
|
||||
|
||||
if type == "linear":
|
||||
n = len(control_points) - 1
|
||||
segment_indices = np.linspace(0, 1, n + 1)
|
||||
|
||||
index = np.searchsorted(segment_indices, t, side="right") - 1
|
||||
|
||||
if t == 1:
|
||||
return control_points[-1]
|
||||
else:
|
||||
t_normalized = (t - segment_indices[index]) / (
|
||||
segment_indices[index + 1] - segment_indices[index]
|
||||
)
|
||||
return control_points[index] + t_normalized * (
|
||||
control_points[index + 1] - control_points[index]
|
||||
)
|
||||
|
||||
raise ValueError("Invalid type")
|
||||
11
src/editor/utils/kldiv.py
Normal file
11
src/editor/utils/kldiv.py
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
import numpy as np
|
||||
|
||||
|
||||
def kldiv(P: np.ndarray, Q: np.ndarray) -> float:
|
||||
P /= P.sum()
|
||||
Q /= Q.sum()
|
||||
|
||||
P_safe = np.maximum(P, np.finfo(float).eps)
|
||||
Q_safe = np.maximum(Q, np.finfo(float).eps)
|
||||
|
||||
return np.sum(P_safe * np.log(P_safe / Q_safe))
|
||||
10
src/editor/utils/random.py
Normal file
10
src/editor/utils/random.py
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
import numpy as np
|
||||
|
||||
|
||||
def random(min: float = 0, max: float = 1):
|
||||
mu = (max + min) / 2 # Mean of the distribution
|
||||
sigma = (
|
||||
max - min
|
||||
) / 6 # Standard deviation, chosen so that ~99.7% fall within [min_val, max_val]
|
||||
sample = np.random.normal(mu, sigma)
|
||||
return np.clip(sample, min, max)
|
||||
3
src/editor/visualisation/__init__.py
Normal file
3
src/editor/visualisation/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
from .display_images import display_images
|
||||
from .plot_histograms_in_3d import plot_histograms_in_3d
|
||||
from .plot_histograms_in_2d import plot_histograms_in_2d
|
||||
25
src/editor/visualisation/display_images.py
Normal file
25
src/editor/visualisation/display_images.py
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
import matplotlib.pyplot as plt
|
||||
from typing import Dict
|
||||
from PIL.Image import Image
|
||||
from math import ceil
|
||||
|
||||
|
||||
def display_images(images: Dict[str, Image], images_per_row: int = 3):
|
||||
fig, axes = plt.subplots(
|
||||
nrows=ceil(len(images) / images_per_row),
|
||||
ncols=min(images_per_row, len(images)),
|
||||
figsize=(12, 8),
|
||||
)
|
||||
|
||||
axes = axes.flatten()
|
||||
|
||||
for i, (title, image) in enumerate(images.items()):
|
||||
axes[i].imshow(image)
|
||||
axes[i].axis("off")
|
||||
axes[i].set_title(title)
|
||||
|
||||
for i in range(len(images), len(axes)):
|
||||
axes[i].axis("off")
|
||||
|
||||
plt.tight_layout()
|
||||
plt.show()
|
||||
32
src/editor/visualisation/plot_histograms_in_2d.py
Normal file
32
src/editor/visualisation/plot_histograms_in_2d.py
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from typing import Dict
|
||||
|
||||
|
||||
def plot_histograms_in_2d(histograms: Dict[str, np.ndarray]):
|
||||
fig = plt.figure(figsize=(15, 5))
|
||||
|
||||
for i, (title, histogram) in enumerate(histograms.items(), 1):
|
||||
ax = fig.add_subplot(1, 3, i, projection="3d")
|
||||
|
||||
size = histogram.shape[0]
|
||||
|
||||
x, y, z = np.indices(histogram.shape)
|
||||
x = x.flatten()
|
||||
y = y.flatten()
|
||||
z = z.flatten()
|
||||
values = histogram.flatten()
|
||||
|
||||
sizes = values * 5000
|
||||
|
||||
colors = np.vstack((x, y, z)).T / (size - 1)
|
||||
|
||||
sc = ax.scatter(x, y, z, c=colors, s=sizes, marker="o", alpha=0.5)
|
||||
|
||||
ax.set_xlim([0, (size - 1)])
|
||||
ax.set_ylim([0, (size - 1)])
|
||||
ax.set_zlim([0, (size - 1)])
|
||||
ax.set_title(title)
|
||||
|
||||
return fig
|
||||
62
src/editor/visualisation/plot_histograms_in_3d.py
Normal file
62
src/editor/visualisation/plot_histograms_in_3d.py
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
from plotly.subplots import make_subplots
|
||||
import plotly.graph_objects as go
|
||||
from math import ceil
|
||||
from typing import Dict
|
||||
import numpy as np
|
||||
|
||||
|
||||
def plot_histograms_in_3d(
|
||||
histograms: Dict[str, np.ndarray], histogram_per_row: int = 3
|
||||
):
|
||||
cols = min(histogram_per_row, len(histograms))
|
||||
rows = ceil(len(histograms) / histogram_per_row)
|
||||
fig = make_subplots(
|
||||
rows=rows,
|
||||
cols=cols,
|
||||
specs=[[{"type": "scatter3d"} for _ in range(cols)] for _ in range(rows)],
|
||||
)
|
||||
|
||||
for i, (title, histogram) in enumerate(histograms.items()):
|
||||
fig.add_trace(
|
||||
_get_3d_scatter_plot_from_histogram(title, histogram),
|
||||
row=(i // (histogram_per_row + 1)) + 1,
|
||||
col=(i % histogram_per_row) + 1,
|
||||
)
|
||||
|
||||
scenes = {
|
||||
f"scene{i}": dict(camera=dict(eye=dict(x=0.1, y=0, z=2)))
|
||||
for i in range(1, len(histograms) + 1)
|
||||
}
|
||||
fig.update_layout(**scenes)
|
||||
fig.show()
|
||||
|
||||
|
||||
def _get_3d_scatter_plot_from_histogram(title, histogram):
|
||||
x, y, z, marker_size = [], [], [], []
|
||||
bins = len(histogram)
|
||||
|
||||
for i, row in enumerate(histogram):
|
||||
for j, col in enumerate(row):
|
||||
for k, value in enumerate(col):
|
||||
if value > 0:
|
||||
x.append(i)
|
||||
y.append(j)
|
||||
z.append(k)
|
||||
marker_size.append(value)
|
||||
|
||||
return go.Scatter3d(
|
||||
x=x,
|
||||
y=y,
|
||||
z=z,
|
||||
mode="markers",
|
||||
marker=dict(
|
||||
size=[min(20, ms * 10000) for ms in marker_size],
|
||||
color=[
|
||||
f"rgb({xi*256/bins},{yi*256/bins},{zi*256/bins})"
|
||||
for xi, yi, zi in zip(x, y, z)
|
||||
],
|
||||
opacity=1,
|
||||
line=dict(width=0),
|
||||
),
|
||||
name=title,
|
||||
)
|
||||
821
src/fetch_from_unsplash.ipynb
Normal file
821
src/fetch_from_unsplash.ipynb
Normal file
|
|
@ -0,0 +1,821 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>photo_id</th>\n",
|
||||
" <th>photo_url</th>\n",
|
||||
" <th>photo_image_url</th>\n",
|
||||
" <th>photo_submitted_at</th>\n",
|
||||
" <th>photo_featured</th>\n",
|
||||
" <th>photo_width</th>\n",
|
||||
" <th>photo_height</th>\n",
|
||||
" <th>photo_aspect_ratio</th>\n",
|
||||
" <th>photo_description</th>\n",
|
||||
" <th>photographer_username</th>\n",
|
||||
" <th>...</th>\n",
|
||||
" <th>photo_location_country</th>\n",
|
||||
" <th>photo_location_city</th>\n",
|
||||
" <th>stats_views</th>\n",
|
||||
" <th>stats_downloads</th>\n",
|
||||
" <th>ai_description</th>\n",
|
||||
" <th>ai_primary_landmark_name</th>\n",
|
||||
" <th>ai_primary_landmark_latitude</th>\n",
|
||||
" <th>ai_primary_landmark_longitude</th>\n",
|
||||
" <th>ai_primary_landmark_confidence</th>\n",
|
||||
" <th>blur_hash</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>3690</th>\n",
|
||||
" <td>XFmznQhx9lM</td>\n",
|
||||
" <td>https://unsplash.com/photos/XFmznQhx9lM</td>\n",
|
||||
" <td>https://images.unsplash.com/photo-156347321301...</td>\n",
|
||||
" <td>2019-07-18 18:07:14.031684</td>\n",
|
||||
" <td>t</td>\n",
|
||||
" <td>4443</td>\n",
|
||||
" <td>2962</td>\n",
|
||||
" <td>1.50</td>\n",
|
||||
" <td>Fall color in the countryside of Eastern Washi...</td>\n",
|
||||
" <td>timothyeberly</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>2978748547</td>\n",
|
||||
" <td>304950</td>\n",
|
||||
" <td>orange leaf trees</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>LBJPSa4o0hW?pI4;-.R*E459O?sk</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>6103</th>\n",
|
||||
" <td>YDNvydD1jAY</td>\n",
|
||||
" <td>https://unsplash.com/photos/YDNvydD1jAY</td>\n",
|
||||
" <td>https://images.unsplash.com/photo-149034936815...</td>\n",
|
||||
" <td>2017-03-24 09:56:57.505262</td>\n",
|
||||
" <td>t</td>\n",
|
||||
" <td>4500</td>\n",
|
||||
" <td>3000</td>\n",
|
||||
" <td>1.50</td>\n",
|
||||
" <td>Flowers in spring</td>\n",
|
||||
" <td>maartendeckers</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>Belgium</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>2722857886</td>\n",
|
||||
" <td>416983</td>\n",
|
||||
" <td>pink, yellow and brown petaled flowers</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>LQJInG*JMyIm^ROpxbNFyCNGnln4</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3389</th>\n",
|
||||
" <td>4oovIxttThA</td>\n",
|
||||
" <td>https://unsplash.com/photos/4oovIxttThA</td>\n",
|
||||
" <td>https://images.unsplash.com/photo-1560850038-f...</td>\n",
|
||||
" <td>2019-06-18 09:36:35.94311</td>\n",
|
||||
" <td>t</td>\n",
|
||||
" <td>5025</td>\n",
|
||||
" <td>3141</td>\n",
|
||||
" <td>1.60</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>a8ka</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>2190084956</td>\n",
|
||||
" <td>253730</td>\n",
|
||||
" <td>aerial view of houses near ocean</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>LaCt8}~BwNIpozoLofofWBWBaef6</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>18789</th>\n",
|
||||
" <td>BkR842UVXqk</td>\n",
|
||||
" <td>https://unsplash.com/photos/BkR842UVXqk</td>\n",
|
||||
" <td>https://images.unsplash.com/photo-1558816280-d...</td>\n",
|
||||
" <td>2019-05-25 20:32:08.153319</td>\n",
|
||||
" <td>t</td>\n",
|
||||
" <td>4000</td>\n",
|
||||
" <td>6000</td>\n",
|
||||
" <td>0.67</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>olenkasergienko</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>1934025254</td>\n",
|
||||
" <td>294785</td>\n",
|
||||
" <td>pink petaled flower</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>LA71AxX50_xHt7j[S1ju0_nm^8NZ</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>21742</th>\n",
|
||||
" <td>GRLN5FC4cLg</td>\n",
|
||||
" <td>https://unsplash.com/photos/GRLN5FC4cLg</td>\n",
|
||||
" <td>https://images.unsplash.com/photo-1552300977-c...</td>\n",
|
||||
" <td>2019-03-11 10:50:25.9311</td>\n",
|
||||
" <td>t</td>\n",
|
||||
" <td>2992</td>\n",
|
||||
" <td>3992</td>\n",
|
||||
" <td>0.75</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>turner_imagery</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>1916027735</td>\n",
|
||||
" <td>306073</td>\n",
|
||||
" <td>high angle photography of cliff</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>LKCr=#~VNat7X-%M%1j?9tNbxaay</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3272</th>\n",
|
||||
" <td>5zsw1PjXg8k</td>\n",
|
||||
" <td>https://unsplash.com/photos/5zsw1PjXg8k</td>\n",
|
||||
" <td>https://images.unsplash.com/photo-142372152343...</td>\n",
|
||||
" <td>2015-02-12 06:12:09.092905</td>\n",
|
||||
" <td>f</td>\n",
|
||||
" <td>2448</td>\n",
|
||||
" <td>3264</td>\n",
|
||||
" <td>0.75</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>melissaaskew</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>14088</td>\n",
|
||||
" <td>812</td>\n",
|
||||
" <td>waterfalls in the middle of the forest</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>LG8#NK.84m4mt6f#RjkD9EM_%N-=</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3273</th>\n",
|
||||
" <td>gqa-fnYASIQ</td>\n",
|
||||
" <td>https://unsplash.com/photos/gqa-fnYASIQ</td>\n",
|
||||
" <td>https://images.unsplash.com/photo-142302719730...</td>\n",
|
||||
" <td>2015-02-04 05:19:59.869141</td>\n",
|
||||
" <td>f</td>\n",
|
||||
" <td>5086</td>\n",
|
||||
" <td>3391</td>\n",
|
||||
" <td>1.50</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>wilstewart3</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>13384</td>\n",
|
||||
" <td>858</td>\n",
|
||||
" <td>a street sign sitting on the side of a body of...</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>LvKKi,RjM{j[_NWBWBfk5EoLoeaz</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>12956</th>\n",
|
||||
" <td>Cq62qvCW8bM</td>\n",
|
||||
" <td>https://unsplash.com/photos/Cq62qvCW8bM</td>\n",
|
||||
" <td>https://images.unsplash.com/photo-142245228993...</td>\n",
|
||||
" <td>2015-01-28 13:38:18.071331</td>\n",
|
||||
" <td>f</td>\n",
|
||||
" <td>4896</td>\n",
|
||||
" <td>3264</td>\n",
|
||||
" <td>1.50</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>kseny</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>13068</td>\n",
|
||||
" <td>1054</td>\n",
|
||||
" <td>man in black shirt and blue pants sitting on b...</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>LGEW2ko~M{%N0;ofnhRkwvozt8of</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>17074</th>\n",
|
||||
" <td>Py8vZdCw35U</td>\n",
|
||||
" <td>https://unsplash.com/photos/Py8vZdCw35U</td>\n",
|
||||
" <td>https://images.unsplash.com/photo-142034363140...</td>\n",
|
||||
" <td>2015-01-04 03:54:41.031772</td>\n",
|
||||
" <td>f</td>\n",
|
||||
" <td>2320</td>\n",
|
||||
" <td>1553</td>\n",
|
||||
" <td>1.49</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>mrbrodeur</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>12617</td>\n",
|
||||
" <td>581</td>\n",
|
||||
" <td>man in black jacket standing on brown sand und...</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>LuI~ZRogaxR*0?Rjofj[Mxs.a|fP</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1394</th>\n",
|
||||
" <td>SpRN0qZPLr8</td>\n",
|
||||
" <td>https://unsplash.com/photos/SpRN0qZPLr8</td>\n",
|
||||
" <td>https://images.unsplash.com/photo-141621393610...</td>\n",
|
||||
" <td>2014-11-17 08:47:33.427134</td>\n",
|
||||
" <td>f</td>\n",
|
||||
" <td>6016</td>\n",
|
||||
" <td>4000</td>\n",
|
||||
" <td>1.50</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>tarunccet</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>8266</td>\n",
|
||||
" <td>101</td>\n",
|
||||
" <td>brown wooden house on green grass field near b...</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>LjEppYn}j]kC%jj[f6f6x8fPaxay</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>25000 rows × 31 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" photo_id photo_url \\\n",
|
||||
"3690 XFmznQhx9lM https://unsplash.com/photos/XFmznQhx9lM \n",
|
||||
"6103 YDNvydD1jAY https://unsplash.com/photos/YDNvydD1jAY \n",
|
||||
"3389 4oovIxttThA https://unsplash.com/photos/4oovIxttThA \n",
|
||||
"18789 BkR842UVXqk https://unsplash.com/photos/BkR842UVXqk \n",
|
||||
"21742 GRLN5FC4cLg https://unsplash.com/photos/GRLN5FC4cLg \n",
|
||||
"... ... ... \n",
|
||||
"3272 5zsw1PjXg8k https://unsplash.com/photos/5zsw1PjXg8k \n",
|
||||
"3273 gqa-fnYASIQ https://unsplash.com/photos/gqa-fnYASIQ \n",
|
||||
"12956 Cq62qvCW8bM https://unsplash.com/photos/Cq62qvCW8bM \n",
|
||||
"17074 Py8vZdCw35U https://unsplash.com/photos/Py8vZdCw35U \n",
|
||||
"1394 SpRN0qZPLr8 https://unsplash.com/photos/SpRN0qZPLr8 \n",
|
||||
"\n",
|
||||
" photo_image_url \\\n",
|
||||
"3690 https://images.unsplash.com/photo-156347321301... \n",
|
||||
"6103 https://images.unsplash.com/photo-149034936815... \n",
|
||||
"3389 https://images.unsplash.com/photo-1560850038-f... \n",
|
||||
"18789 https://images.unsplash.com/photo-1558816280-d... \n",
|
||||
"21742 https://images.unsplash.com/photo-1552300977-c... \n",
|
||||
"... ... \n",
|
||||
"3272 https://images.unsplash.com/photo-142372152343... \n",
|
||||
"3273 https://images.unsplash.com/photo-142302719730... \n",
|
||||
"12956 https://images.unsplash.com/photo-142245228993... \n",
|
||||
"17074 https://images.unsplash.com/photo-142034363140... \n",
|
||||
"1394 https://images.unsplash.com/photo-141621393610... \n",
|
||||
"\n",
|
||||
" photo_submitted_at photo_featured photo_width photo_height \\\n",
|
||||
"3690 2019-07-18 18:07:14.031684 t 4443 2962 \n",
|
||||
"6103 2017-03-24 09:56:57.505262 t 4500 3000 \n",
|
||||
"3389 2019-06-18 09:36:35.94311 t 5025 3141 \n",
|
||||
"18789 2019-05-25 20:32:08.153319 t 4000 6000 \n",
|
||||
"21742 2019-03-11 10:50:25.9311 t 2992 3992 \n",
|
||||
"... ... ... ... ... \n",
|
||||
"3272 2015-02-12 06:12:09.092905 f 2448 3264 \n",
|
||||
"3273 2015-02-04 05:19:59.869141 f 5086 3391 \n",
|
||||
"12956 2015-01-28 13:38:18.071331 f 4896 3264 \n",
|
||||
"17074 2015-01-04 03:54:41.031772 f 2320 1553 \n",
|
||||
"1394 2014-11-17 08:47:33.427134 f 6016 4000 \n",
|
||||
"\n",
|
||||
" photo_aspect_ratio photo_description \\\n",
|
||||
"3690 1.50 Fall color in the countryside of Eastern Washi... \n",
|
||||
"6103 1.50 Flowers in spring \n",
|
||||
"3389 1.60 NaN \n",
|
||||
"18789 0.67 NaN \n",
|
||||
"21742 0.75 NaN \n",
|
||||
"... ... ... \n",
|
||||
"3272 0.75 NaN \n",
|
||||
"3273 1.50 NaN \n",
|
||||
"12956 1.50 NaN \n",
|
||||
"17074 1.49 NaN \n",
|
||||
"1394 1.50 NaN \n",
|
||||
"\n",
|
||||
" photographer_username ... photo_location_country photo_location_city \\\n",
|
||||
"3690 timothyeberly ... NaN NaN \n",
|
||||
"6103 maartendeckers ... Belgium NaN \n",
|
||||
"3389 a8ka ... NaN NaN \n",
|
||||
"18789 olenkasergienko ... NaN NaN \n",
|
||||
"21742 turner_imagery ... NaN NaN \n",
|
||||
"... ... ... ... ... \n",
|
||||
"3272 melissaaskew ... NaN NaN \n",
|
||||
"3273 wilstewart3 ... NaN NaN \n",
|
||||
"12956 kseny ... NaN NaN \n",
|
||||
"17074 mrbrodeur ... NaN NaN \n",
|
||||
"1394 tarunccet ... NaN NaN \n",
|
||||
"\n",
|
||||
" stats_views stats_downloads \\\n",
|
||||
"3690 2978748547 304950 \n",
|
||||
"6103 2722857886 416983 \n",
|
||||
"3389 2190084956 253730 \n",
|
||||
"18789 1934025254 294785 \n",
|
||||
"21742 1916027735 306073 \n",
|
||||
"... ... ... \n",
|
||||
"3272 14088 812 \n",
|
||||
"3273 13384 858 \n",
|
||||
"12956 13068 1054 \n",
|
||||
"17074 12617 581 \n",
|
||||
"1394 8266 101 \n",
|
||||
"\n",
|
||||
" ai_description \\\n",
|
||||
"3690 orange leaf trees \n",
|
||||
"6103 pink, yellow and brown petaled flowers \n",
|
||||
"3389 aerial view of houses near ocean \n",
|
||||
"18789 pink petaled flower \n",
|
||||
"21742 high angle photography of cliff \n",
|
||||
"... ... \n",
|
||||
"3272 waterfalls in the middle of the forest \n",
|
||||
"3273 a street sign sitting on the side of a body of... \n",
|
||||
"12956 man in black shirt and blue pants sitting on b... \n",
|
||||
"17074 man in black jacket standing on brown sand und... \n",
|
||||
"1394 brown wooden house on green grass field near b... \n",
|
||||
"\n",
|
||||
" ai_primary_landmark_name ai_primary_landmark_latitude \\\n",
|
||||
"3690 NaN NaN \n",
|
||||
"6103 NaN NaN \n",
|
||||
"3389 NaN NaN \n",
|
||||
"18789 NaN NaN \n",
|
||||
"21742 NaN NaN \n",
|
||||
"... ... ... \n",
|
||||
"3272 NaN NaN \n",
|
||||
"3273 NaN NaN \n",
|
||||
"12956 NaN NaN \n",
|
||||
"17074 NaN NaN \n",
|
||||
"1394 NaN NaN \n",
|
||||
"\n",
|
||||
" ai_primary_landmark_longitude ai_primary_landmark_confidence \\\n",
|
||||
"3690 NaN NaN \n",
|
||||
"6103 NaN NaN \n",
|
||||
"3389 NaN NaN \n",
|
||||
"18789 NaN NaN \n",
|
||||
"21742 NaN NaN \n",
|
||||
"... ... ... \n",
|
||||
"3272 NaN NaN \n",
|
||||
"3273 NaN NaN \n",
|
||||
"12956 NaN NaN \n",
|
||||
"17074 NaN NaN \n",
|
||||
"1394 NaN NaN \n",
|
||||
"\n",
|
||||
" blur_hash \n",
|
||||
"3690 LBJPSa4o0hW?pI4;-.R*E459O?sk \n",
|
||||
"6103 LQJInG*JMyIm^ROpxbNFyCNGnln4 \n",
|
||||
"3389 LaCt8}~BwNIpozoLofofWBWBaef6 \n",
|
||||
"18789 LA71AxX50_xHt7j[S1ju0_nm^8NZ \n",
|
||||
"21742 LKCr=#~VNat7X-%M%1j?9tNbxaay \n",
|
||||
"... ... \n",
|
||||
"3272 LG8#NK.84m4mt6f#RjkD9EM_%N-= \n",
|
||||
"3273 LvKKi,RjM{j[_NWBWBfk5EoLoeaz \n",
|
||||
"12956 LGEW2ko~M{%N0;ofnhRkwvozt8of \n",
|
||||
"17074 LuI~ZRogaxR*0?Rjofj[Mxs.a|fP \n",
|
||||
"1394 LjEppYn}j]kC%jj[f6f6x8fPaxay \n",
|
||||
"\n",
|
||||
"[25000 rows x 31 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"from pathlib import Path\n",
|
||||
"\n",
|
||||
"DATA_PATH = Path('/mnt/wsl/PHYSICALDRIVE1/data/unsplash')\n",
|
||||
"DATA_PATH.mkdir(exist_ok=True, parents=True)\n",
|
||||
"\n",
|
||||
"unsplash_dataset_path = \"/home/andras/projects/bipolaroid/unsplash-research-dataset-lite-latest/photos.tsv000\"\n",
|
||||
"unsplash_dataset = pd.read_csv(unsplash_dataset_path, sep=\"\\t\")\n",
|
||||
"unsplash_dataset.sort_values(by=\"stats_views\", ascending=False, inplace=True)\n",
|
||||
"unsplash_dataset"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" 0%| | 113/25000 [00:30<1:02:36, 6.62it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Error downloading toPRrcyAIUY: HTTPSConnectionPool(host='images.unsplash.com', port=443): Read timed out.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" 1%| | 184/25000 [00:48<1:19:49, 5.18it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Error downloading t7YycgAoVSw: HTTPSConnectionPool(host='images.unsplash.com', port=443): Read timed out.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" 1%| | 219/25000 [00:58<1:40:16, 4.12it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Error downloading LOlMe8HfofI: HTTPSConnectionPool(host='images.unsplash.com', port=443): Read timed out.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" 3%|▎ | 744/25000 [03:24<1:18:52, 5.12it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Error downloading vigsqYux_-8: HTTPSConnectionPool(host='images.unsplash.com_thebeach.jpg', port=443): Max retries exceeded with url: / (Caused by NameResolutionError(\"<urllib3.connection.HTTPSConnection object at 0x7faa48c37350>: Failed to resolve 'images.unsplash.com_thebeach.jpg' ([Errno -2] Name or service not known)\"))\n",
|
||||
"Error downloading vigsqYux_-8: HTTPSConnectionPool(host='images.unsplash.com_thebeach.jpg', port=443): Max retries exceeded with url: / (Caused by NameResolutionError(\"<urllib3.connection.HTTPSConnection object at 0x7fa9f50686e0>: Failed to resolve 'images.unsplash.com_thebeach.jpg' ([Errno -2] Name or service not known)\"))\n",
|
||||
"Error downloading vigsqYux_-8: HTTPSConnectionPool(host='images.unsplash.com_thebeach.jpg', port=443): Max retries exceeded with url: / (Caused by NameResolutionError(\"<urllib3.connection.HTTPSConnection object at 0x7faa48df49e0>: Failed to resolve 'images.unsplash.com_thebeach.jpg' ([Errno -2] Name or service not known)\"))\n",
|
||||
"Error downloading vigsqYux_-8: HTTPSConnectionPool(host='images.unsplash.com_thebeach.jpg', port=443): Max retries exceeded with url: / (Caused by NameResolutionError(\"<urllib3.connection.HTTPSConnection object at 0x7faa48df7b60>: Failed to resolve 'images.unsplash.com_thebeach.jpg' ([Errno -2] Name or service not known)\"))\n",
|
||||
"Error downloading vigsqYux_-8: HTTPSConnectionPool(host='images.unsplash.com_thebeach.jpg', port=443): Max retries exceeded with url: / (Caused by NameResolutionError(\"<urllib3.connection.HTTPSConnection object at 0x7faa48dc81d0>: Failed to resolve 'images.unsplash.com_thebeach.jpg' ([Errno -2] Name or service not known)\"))\n",
|
||||
"Error downloading vigsqYux_-8: HTTPSConnectionPool(host='images.unsplash.com_thebeach.jpg', port=443): Max retries exceeded with url: / (Caused by NameResolutionError(\"<urllib3.connection.HTTPSConnection object at 0x7faa48dcac90>: Failed to resolve 'images.unsplash.com_thebeach.jpg' ([Errno -2] Name or service not known)\"))\n",
|
||||
"Error downloading vigsqYux_-8: HTTPSConnectionPool(host='images.unsplash.com_thebeach.jpg', port=443): Max retries exceeded with url: / (Caused by NameResolutionError(\"<urllib3.connection.HTTPSConnection object at 0x7faa48df5850>: Failed to resolve 'images.unsplash.com_thebeach.jpg' ([Errno -2] Name or service not known)\"))\n",
|
||||
"Error downloading vigsqYux_-8: HTTPSConnectionPool(host='images.unsplash.com_thebeach.jpg', port=443): Max retries exceeded with url: / (Caused by NameResolutionError(\"<urllib3.connection.HTTPSConnection object at 0x7faa48df7cb0>: Failed to resolve 'images.unsplash.com_thebeach.jpg' ([Errno -2] Name or service not known)\"))\n",
|
||||
"Error downloading vigsqYux_-8: HTTPSConnectionPool(host='images.unsplash.com_thebeach.jpg', port=443): Max retries exceeded with url: / (Caused by NameResolutionError(\"<urllib3.connection.HTTPSConnection object at 0x7faa48c351f0>: Failed to resolve 'images.unsplash.com_thebeach.jpg' ([Errno -2] Name or service not known)\"))\n",
|
||||
"Error downloading vigsqYux_-8: HTTPSConnectionPool(host='images.unsplash.com_thebeach.jpg', port=443): Max retries exceeded with url: / (Caused by NameResolutionError(\"<urllib3.connection.HTTPSConnection object at 0x7fa9f4f6b4a0>: Failed to resolve 'images.unsplash.com_thebeach.jpg' ([Errno -2] Name or service not known)\"))\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" 12%|█▏ | 2885/25000 [12:37<1:37:39, 3.77it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Error downloading 9_9hzZVjV8s: HTTPSConnectionPool(host='images.unsplash.company', port=443): Max retries exceeded with url: / (Caused by NameResolutionError(\"<urllib3.connection.HTTPSConnection object at 0x7faa481fd370>: Failed to resolve 'images.unsplash.company' ([Errno -2] Name or service not known)\"))\n",
|
||||
"Error downloading 9_9hzZVjV8s: HTTPSConnectionPool(host='images.unsplash.company', port=443): Max retries exceeded with url: / (Caused by NameResolutionError(\"<urllib3.connection.HTTPSConnection object at 0x7faa483a9580>: Failed to resolve 'images.unsplash.company' ([Errno -2] Name or service not known)\"))\n",
|
||||
"Error downloading 9_9hzZVjV8s: HTTPSConnectionPool(host='images.unsplash.company', port=443): Max retries exceeded with url: / (Caused by NameResolutionError(\"<urllib3.connection.HTTPSConnection object at 0x7faa48380560>: Failed to resolve 'images.unsplash.company' ([Errno -2] Name or service not known)\"))\n",
|
||||
"Error downloading 9_9hzZVjV8s: HTTPSConnectionPool(host='images.unsplash.company', port=443): Max retries exceeded with url: / (Caused by NameResolutionError(\"<urllib3.connection.HTTPSConnection object at 0x7faa48382ba0>: Failed to resolve 'images.unsplash.company' ([Errno -2] Name or service not known)\"))\n",
|
||||
"Error downloading 9_9hzZVjV8s: HTTPSConnectionPool(host='images.unsplash.company', port=443): Max retries exceeded with url: / (Caused by NameResolutionError(\"<urllib3.connection.HTTPSConnection object at 0x7faa48381940>: Failed to resolve 'images.unsplash.company' ([Errno -2] Name or service not known)\"))\n",
|
||||
"Error downloading 9_9hzZVjV8s: HTTPSConnectionPool(host='images.unsplash.company', port=443): Max retries exceeded with url: / (Caused by NameResolutionError(\"<urllib3.connection.HTTPSConnection object at 0x7faa483828a0>: Failed to resolve 'images.unsplash.company' ([Errno -2] Name or service not known)\"))\n",
|
||||
"Error downloading 9_9hzZVjV8s: HTTPSConnectionPool(host='images.unsplash.company', port=443): Max retries exceeded with url: / (Caused by NameResolutionError(\"<urllib3.connection.HTTPSConnection object at 0x7faa483aaf60>: Failed to resolve 'images.unsplash.company' ([Errno -2] Name or service not known)\"))\n",
|
||||
"Error downloading 9_9hzZVjV8s: HTTPSConnectionPool(host='images.unsplash.company', port=443): Max retries exceeded with url: / (Caused by NameResolutionError(\"<urllib3.connection.HTTPSConnection object at 0x7faa481fee10>: Failed to resolve 'images.unsplash.company' ([Errno -2] Name or service not known)\"))\n",
|
||||
"Error downloading 9_9hzZVjV8s: HTTPSConnectionPool(host='images.unsplash.company', port=443): Max retries exceeded with url: / (Caused by NameResolutionError(\"<urllib3.connection.HTTPSConnection object at 0x7fa9f56cbfb0>: Failed to resolve 'images.unsplash.company' ([Errno -2] Name or service not known)\"))\n",
|
||||
"Error downloading 9_9hzZVjV8s: HTTPSConnectionPool(host='images.unsplash.company', port=443): Max retries exceeded with url: / (Caused by NameResolutionError(\"<urllib3.connection.HTTPSConnection object at 0x7faa48383920>: Failed to resolve 'images.unsplash.company' ([Errno -2] Name or service not known)\"))\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" 12%|█▏ | 2909/25000 [12:43<1:15:43, 4.86it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Error downloading NcociWzk23A: HTTPSConnectionPool(host='images.unsplash.com', port=443): Read timed out.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" 14%|█▍ | 3505/25000 [15:13<1:46:00, 3.38it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Error downloading rsJtMXn3p_c: HTTPSConnectionPool(host='images.unsplash.com-grass-sun.jpg', port=443): Max retries exceeded with url: / (Caused by NameResolutionError(\"<urllib3.connection.HTTPSConnection object at 0x7faa481af4d0>: Failed to resolve 'images.unsplash.com-grass-sun.jpg' ([Errno -2] Name or service not known)\"))\n",
|
||||
"Error downloading rsJtMXn3p_c: HTTPSConnectionPool(host='images.unsplash.com-grass-sun.jpg', port=443): Max retries exceeded with url: / (Caused by NameResolutionError(\"<urllib3.connection.HTTPSConnection object at 0x7faa481ad160>: Failed to resolve 'images.unsplash.com-grass-sun.jpg' ([Errno -2] Name or service not known)\"))\n",
|
||||
"Error downloading rsJtMXn3p_c: HTTPSConnectionPool(host='images.unsplash.com-grass-sun.jpg', port=443): Max retries exceeded with url: / (Caused by NameResolutionError(\"<urllib3.connection.HTTPSConnection object at 0x7faa481943b0>: Failed to resolve 'images.unsplash.com-grass-sun.jpg' ([Errno -2] Name or service not known)\"))\n",
|
||||
"Error downloading rsJtMXn3p_c: HTTPSConnectionPool(host='images.unsplash.com-grass-sun.jpg', port=443): Max retries exceeded with url: / (Caused by NameResolutionError(\"<urllib3.connection.HTTPSConnection object at 0x7faa4818bcb0>: Failed to resolve 'images.unsplash.com-grass-sun.jpg' ([Errno -2] Name or service not known)\"))\n",
|
||||
"Error downloading rsJtMXn3p_c: HTTPSConnectionPool(host='images.unsplash.com-grass-sun.jpg', port=443): Max retries exceeded with url: / (Caused by NameResolutionError(\"<urllib3.connection.HTTPSConnection object at 0x7faa48189fa0>: Failed to resolve 'images.unsplash.com-grass-sun.jpg' ([Errno -2] Name or service not known)\"))\n",
|
||||
"Error downloading rsJtMXn3p_c: HTTPSConnectionPool(host='images.unsplash.com-grass-sun.jpg', port=443): Max retries exceeded with url: / (Caused by NameResolutionError(\"<urllib3.connection.HTTPSConnection object at 0x7faa48189a00>: Failed to resolve 'images.unsplash.com-grass-sun.jpg' ([Errno -2] Name or service not known)\"))\n",
|
||||
"Error downloading rsJtMXn3p_c: HTTPSConnectionPool(host='images.unsplash.com-grass-sun.jpg', port=443): Max retries exceeded with url: / (Caused by NameResolutionError(\"<urllib3.connection.HTTPSConnection object at 0x7faa48196c60>: Failed to resolve 'images.unsplash.com-grass-sun.jpg' ([Errno -2] Name or service not known)\"))\n",
|
||||
"Error downloading rsJtMXn3p_c: HTTPSConnectionPool(host='images.unsplash.com-grass-sun.jpg', port=443): Max retries exceeded with url: / (Caused by NameResolutionError(\"<urllib3.connection.HTTPSConnection object at 0x7faa481ac710>: Failed to resolve 'images.unsplash.com-grass-sun.jpg' ([Errno -2] Name or service not known)\"))\n",
|
||||
"Error downloading rsJtMXn3p_c: HTTPSConnectionPool(host='images.unsplash.com-grass-sun.jpg', port=443): Max retries exceeded with url: / (Caused by NameResolutionError(\"<urllib3.connection.HTTPSConnection object at 0x7faa481693a0>: Failed to resolve 'images.unsplash.com-grass-sun.jpg' ([Errno -2] Name or service not known)\"))\n",
|
||||
"Error downloading rsJtMXn3p_c: HTTPSConnectionPool(host='images.unsplash.com-grass-sun.jpg', port=443): Max retries exceeded with url: / (Caused by NameResolutionError(\"<urllib3.connection.HTTPSConnection object at 0x7fa9f56e0110>: Failed to resolve 'images.unsplash.com-grass-sun.jpg' ([Errno -2] Name or service not known)\"))\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" 29%|██▉ | 7352/25000 [31:15<1:03:57, 4.60it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Error downloading T2LEdBxpm54: HTTPSConnectionPool(host='images.unsplash.com', port=443): Read timed out.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" 34%|███▎ | 8375/25000 [35:53<1:13:19, 3.78it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Error downloading q_4pIVaXPEk: HTTPSConnectionPool(host='images.unsplash.com', port=443): Read timed out.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" 34%|███▍ | 8568/25000 [36:40<1:08:17, 4.01it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Error downloading iGANt1N2ge8: HTTPSConnectionPool(host='images.unsplash.com', port=443): Read timed out. (read timeout=10)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" 37%|███▋ | 9282/25000 [40:24<1:41:11, 2.59it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Error downloading 2FqpN2CWCLo: HTTPSConnectionPool(host='images.unsplash.com', port=443): Read timed out.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" 37%|███▋ | 9343/25000 [40:45<1:22:43, 3.15it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Error downloading 4T7-GLBDLKE: HTTPSConnectionPool(host='images.unsplash.com', port=443): Read timed out.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" 38%|███▊ | 9398/25000 [41:03<1:43:40, 2.51it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Error downloading i-xtI6jD7bQ: HTTPSConnectionPool(host='images.unsplash.com', port=443): Read timed out.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" 40%|███▉ | 9992/25000 [44:16<1:03:56, 3.91it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Error downloading 0GBafJ-ZenA: HTTPSConnectionPool(host='images.unsplash.com', port=443): Read timed out. (read timeout=10)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" 46%|████▌ | 11508/25000 [51:29<1:16:16, 2.95it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Error downloading k2RWB_aPfqI: HTTPSConnectionPool(host='images.unsplash.com', port=443): Read timed out.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" 47%|████▋ | 11626/25000 [52:01<36:59, 6.03it/s] "
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Error downloading 7ICXVb10NJs: HTTPSConnectionPool(host='images.unsplash.com', port=443): Read timed out.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" 47%|████▋ | 11655/25000 [52:08<1:05:47, 3.38it/s]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Error downloading PgBTaq-AgVI: HTTPSConnectionPool(host='images.unsplash.com', port=443): Read timed out.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" 62%|██████▏ | 15477/25000 [1:10:12<47:47, 3.32it/s] "
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Error downloading zS_b76LrEL8: HTTPSConnectionPool(host='images.unsplash.com', port=443): Read timed out.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" 70%|██████▉ | 17470/25000 [1:19:44<19:51, 6.32it/s] "
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import concurrent.futures\n",
|
||||
"import requests\n",
|
||||
"from tqdm import tqdm\n",
|
||||
"from typing import List\n",
|
||||
"\n",
|
||||
"progress = tqdm(total=len(unsplash_dataset))\n",
|
||||
"\n",
|
||||
"def download_image(row):\n",
|
||||
" filename = DATA_PATH / f\"{row['photo_id']}.jpg\"\n",
|
||||
" for _ in range(10):\n",
|
||||
" try:\n",
|
||||
" response = requests.get(row[\"photo_image_url\"], timeout=10)\n",
|
||||
" with open(filename, \"wb\") as f:\n",
|
||||
" f.write(response.content)\n",
|
||||
" with progress.get_lock():\n",
|
||||
" progress.update(1)\n",
|
||||
" break\n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"Error downloading {row['photo_id']}: {e}\")\n",
|
||||
"\n",
|
||||
"with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:\n",
|
||||
" futures: List[concurrent.futures.Future] = []\n",
|
||||
" for row in unsplash_dataset.iterrows():\n",
|
||||
" row = row[1]\n",
|
||||
" future = executor.submit(download_image, row)\n",
|
||||
" futures.append(future)\n",
|
||||
"\n",
|
||||
" progress.display()\n",
|
||||
" concurrent.futures.wait(futures)\n",
|
||||
"progress.close()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "bipolaroid",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
109
src/laion.ipynb
Normal file
109
src/laion.ipynb
Normal file
|
|
@ -0,0 +1,109 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"100%|██████████| 3439/3439 [00:00<00:00, 6104.95it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:00<00:00, 6988.74it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:00<00:00, 6957.73it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:00<00:00, 6734.31it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:00<00:00, 7696.85it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:00<00:00, 7331.94it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:00<00:00, 6240.69it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:00<00:00, 7451.37it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:00<00:00, 7135.27it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:00<00:00, 3855.91it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:00<00:00, 3567.51it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:01<00:00, 2853.24it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:00<00:00, 6952.67it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:00<00:00, 6177.45it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:01<00:00, 3130.18it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:01<00:00, 3303.45it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:00<00:00, 3662.39it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:01<00:00, 2754.25it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:00<00:00, 6633.24it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:00<00:00, 6548.62it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:00<00:00, 4601.06it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:01<00:00, 2288.88it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:00<00:00, 3635.54it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:01<00:00, 2179.42it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:00<00:00, 6750.76it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:00<00:00, 6691.62it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:00<00:00, 5768.00it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:00<00:00, 3440.06it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:01<00:00, 2743.69it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:01<00:00, 3034.45it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:02<00:00, 1261.15it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:00<00:00, 6129.07it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:00<00:00, 6573.12it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:00<00:00, 6425.97it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:01<00:00, 2865.05it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:00<00:00, 4130.32it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:01<00:00, 3020.61it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:02<00:00, 1446.82it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:00<00:00, 8095.71it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:00<00:00, 7679.18it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:00<00:00, 7918.50it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:00<00:00, 3519.17it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:01<00:00, 3258.94it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:01<00:00, 2436.68it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:03<00:00, 1000.79it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:00<00:00, 7625.18it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:00<00:00, 7752.86it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:00<00:00, 7538.78it/s]\n",
|
||||
"100%|██████████| 3439/3439 [00:01<00:00, 3115.93it/s]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"from pathlib import Path\n",
|
||||
"from tqdm import tqdm\n",
|
||||
"import hashlib\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"DATA_PATH = Path('/mnt/wsl/PHYSICALDRIVE1/data/laion')\n",
|
||||
"DATA_PATH.mkdir(exist_ok=True, parents=True)\n",
|
||||
"\n",
|
||||
"LAION_PATH = Path('/home/andras/projects/laion_improved_aesthetics_6.5plus_with_images/data')\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"for file in LAION_PATH.glob(\"*.parquet\"):\n",
|
||||
" df = pd.read_parquet(file)\n",
|
||||
" for row in tqdm(list(df.iterrows())):\n",
|
||||
" row = row[1]\n",
|
||||
" bytes = row['image']['bytes']\n",
|
||||
" digest = hashlib.sha1(bytes).hexdigest()\n",
|
||||
" with open(DATA_PATH / f\"{digest}.jpg\", 'wb') as f:\n",
|
||||
" f.write(bytes)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "bipolaroid",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
31958
src/pdf_transfer.ipynb
Normal file
31958
src/pdf_transfer.ipynb
Normal file
File diff suppressed because one or more lines are too long
40783
src/show_histograms.ipynb
Normal file
40783
src/show_histograms.ipynb
Normal file
File diff suppressed because one or more lines are too long
111557
src/train.ipynb
Normal file
111557
src/train.ipynb
Normal file
File diff suppressed because one or more lines are too long
Loading…
Add table
Add a link
Reference in a new issue