Improve cropping with rule of thirds
This commit is contained in:
parent
5890237449
commit
cbee345d93
2 changed files with 64 additions and 47 deletions
|
|
@ -1,25 +1,29 @@
|
||||||
"""Resize-to-cover then crop, biased toward Immich-detected face boxes."""
|
"""Resize-to-cover with face-aware positioning.
|
||||||
|
|
||||||
|
When a portrait source is cropped onto a landscape target, the face joint-span
|
||||||
|
centre lands on the top third of the crop window instead of the middle, so the
|
||||||
|
eyes sit on the upper-third line where landscape composition naturally reads.
|
||||||
|
"""
|
||||||
|
|
||||||
import math
|
import math
|
||||||
|
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
# Face boxes end at the hairline; extend each box upward by this fraction of
|
# Face boxes end at the hairline; extend each box upward by this fraction of
|
||||||
# its own height so the fit-check considers the head, not just the face.
|
# its own height so the joint-span midpoint lands closer to the eyes than the
|
||||||
|
# bare face centre.
|
||||||
HEAD_EXTENSION = 0.4
|
HEAD_EXTENSION = 0.4
|
||||||
|
|
||||||
|
|
||||||
def face_aware_crop(
|
def face_aware_crop(
|
||||||
image: Image.Image, target_w: int, target_h: int, faces: list[dict]
|
image: Image.Image, target_w: int, target_h: int, faces: list[dict]
|
||||||
) -> Image.Image:
|
) -> Image.Image:
|
||||||
"""Resize to cover (target_w, target_h), then crop to keep faces in frame.
|
"""Resize to cover (target_w, target_h), then crop biased toward face boxes.
|
||||||
|
|
||||||
Each face dict has imageWidth/imageHeight (the coord-space dims) and
|
Joint-span midpoint of the head-extended boxes sets the crop centre. For
|
||||||
boundingBoxX1/Y1/X2/Y2. Per axis: if every (head-extended) face fits in
|
portrait sources rendered on a landscape target, the centre is placed at
|
||||||
the crop we centre on the joint span so all faces are included with hair
|
the top third of the crop window (rule of thirds) instead of the middle.
|
||||||
clearance on top. If the span doesn't fit, we fall back to the
|
Plain centre crop when no faces.
|
||||||
area-weighted centroid of the unextended boxes — that biases toward the
|
|
||||||
biggest, presumably foreground, face. Plain center crop when no faces.
|
|
||||||
"""
|
"""
|
||||||
img_w, img_h = image.size
|
img_w, img_h = image.size
|
||||||
img_aspect = img_w / img_h
|
img_aspect = img_w / img_h
|
||||||
|
|
@ -43,22 +47,13 @@ def face_aware_crop(
|
||||||
y1 = f["boundingBoxY1"] * sy
|
y1 = f["boundingBoxY1"] * sy
|
||||||
x2 = f["boundingBoxX2"] * sx
|
x2 = f["boundingBoxX2"] * sx
|
||||||
y2 = f["boundingBoxY2"] * sy
|
y2 = f["boundingBoxY2"] * sy
|
||||||
area = max(0.0, (x2 - x1) * (y2 - y1))
|
boxes.append((x1, y1, x2, y2))
|
||||||
boxes.append((x1, y1, x2, y2, area))
|
cx = (min(b[0] for b in boxes) + max(b[2] for b in boxes)) / 2
|
||||||
|
|
||||||
x_lo = min(b[0] for b in boxes)
|
|
||||||
x_hi = max(b[2] for b in boxes)
|
|
||||||
cx = (x_lo + x_hi) / 2 if x_hi - x_lo <= target_w else _weighted_center(boxes, 0, 2)
|
|
||||||
|
|
||||||
y_lo_ext = min(b[1] - (b[3] - b[1]) * HEAD_EXTENSION for b in boxes)
|
y_lo_ext = min(b[1] - (b[3] - b[1]) * HEAD_EXTENSION for b in boxes)
|
||||||
y_hi = max(b[3] for b in boxes)
|
y_hi = max(b[3] for b in boxes)
|
||||||
cy = (y_lo_ext + y_hi) / 2 if y_hi - y_lo_ext <= target_h else _weighted_center(boxes, 1, 3)
|
cy = (y_lo_ext + y_hi) / 2
|
||||||
|
|
||||||
|
y_anchor = target_h / 3 if img_h > img_w and target_w > target_h else target_h / 2
|
||||||
x_off = max(0, min(int(cx - target_w / 2), new_w - target_w))
|
x_off = max(0, min(int(cx - target_w / 2), new_w - target_w))
|
||||||
y_off = max(0, min(int(cy - target_h / 2), new_h - target_h))
|
y_off = max(0, min(int(cy - y_anchor), new_h - target_h))
|
||||||
return resized.crop((x_off, y_off, x_off + target_w, y_off + target_h))
|
return resized.crop((x_off, y_off, x_off + target_w, y_off + target_h))
|
||||||
|
|
||||||
|
|
||||||
def _weighted_center(boxes: list[tuple], lo: int, hi: int) -> float:
|
|
||||||
total = sum(b[4] for b in boxes) or 1.0
|
|
||||||
return sum((b[lo] + b[hi]) / 2 * b[4] for b in boxes) / total
|
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,14 @@ from net import urlopen_with_retry
|
||||||
HISTORY_FILE = Path(__file__).parent.parent / "photo_history.json"
|
HISTORY_FILE = Path(__file__).parent.parent / "photo_history.json"
|
||||||
CACHE_DIR = Path(tempfile.gettempdir()) / "frame_cache"
|
CACHE_DIR = Path(tempfile.gettempdir()) / "frame_cache"
|
||||||
|
|
||||||
|
# Soft preference for picking photos whose orientation matches the frame.
|
||||||
|
# Mismatched-orientation photos still appear, just less often, since
|
||||||
|
# face_aware_crop handles them via the rule-of-thirds composition.
|
||||||
|
ORIENTATION_MATCH_WEIGHT = 0.8
|
||||||
|
ORIENTATION_DIFFER_WEIGHT = 0.2
|
||||||
|
|
||||||
|
_ROTATED_EXIF_ORIENTATIONS = {5, 6, 7, 8, "5", "6", "7", "8"}
|
||||||
|
|
||||||
|
|
||||||
def _cache_get(key: str) -> list[dict] | None:
|
def _cache_get(key: str) -> list[dict] | None:
|
||||||
path = CACHE_DIR / f"{key}.json"
|
path = CACHE_DIR / f"{key}.json"
|
||||||
|
|
@ -144,23 +152,36 @@ class ImmichClient:
|
||||||
return assets
|
return assets
|
||||||
|
|
||||||
|
|
||||||
_ROTATED_ORIENTATIONS = {5, 6, 7, 8, "5", "6", "7", "8"}
|
def _is_portrait(asset: dict) -> bool | None:
|
||||||
|
"""True if the asset's pixel orientation is portrait, None if EXIF dims are missing."""
|
||||||
|
exif = asset.get("exifInfo") or {}
|
||||||
|
w, h = exif.get("exifImageWidth") or 0, exif.get("exifImageHeight") or 0
|
||||||
|
if not (w and h):
|
||||||
|
return None
|
||||||
|
if exif.get("orientation") in _ROTATED_EXIF_ORIENTATIONS:
|
||||||
|
w, h = h, w
|
||||||
|
return h > w
|
||||||
|
|
||||||
|
|
||||||
def _filter_by_orientation(assets: list[dict], portrait: bool) -> list[dict]:
|
def _bias_by_orientation(candidates: list[dict], frame_portrait: bool) -> list[dict]:
|
||||||
"""Keep assets matching the requested orientation. Skips assets without EXIF dimensions."""
|
"""Pick the matching or differing-orientation pool per the configured weights."""
|
||||||
out = []
|
matching, differing = [], []
|
||||||
for a in assets:
|
for a in candidates:
|
||||||
exif = a.get("exifInfo") or {}
|
is_p = _is_portrait(a)
|
||||||
w = exif.get("exifImageWidth") or 0
|
# Unknown orientation defaults to "matching" — better to include than to drop.
|
||||||
h = exif.get("exifImageHeight") or 0
|
if is_p is None or is_p == frame_portrait:
|
||||||
if not (w and h):
|
matching.append(a)
|
||||||
continue
|
else:
|
||||||
if exif.get("orientation") in _ROTATED_ORIENTATIONS:
|
differing.append(a)
|
||||||
w, h = h, w
|
if not differing:
|
||||||
if (h > w) == portrait:
|
return matching
|
||||||
out.append(a)
|
if not matching:
|
||||||
return out
|
return differing
|
||||||
|
pools = [(matching, ORIENTATION_MATCH_WEIGHT), (differing, ORIENTATION_DIFFER_WEIGHT)]
|
||||||
|
pool, _ = random.choices(pools, weights=[w for _, w in pools])[0]
|
||||||
|
chosen = "matching" if pool is matching else "differing"
|
||||||
|
print(f"Orientation: {len(matching)} matching, {len(differing)} differing, picked {chosen}")
|
||||||
|
return pool
|
||||||
|
|
||||||
|
|
||||||
def _on_this_day_candidates(assets: list[dict]) -> tuple[list[dict], bool]:
|
def _on_this_day_candidates(assets: list[dict]) -> tuple[list[dict], bool]:
|
||||||
|
|
@ -223,21 +244,22 @@ def _pick_weighted_random(assets: list[dict]) -> dict:
|
||||||
def _pick_and_download(
|
def _pick_and_download(
|
||||||
client: ImmichClient, assets: list[dict], orientation: int, source_label: str
|
client: ImmichClient, assets: list[dict], orientation: int, source_label: str
|
||||||
) -> tuple[Path, dict]:
|
) -> tuple[Path, dict]:
|
||||||
portrait = orientation in (90, 270)
|
if not assets:
|
||||||
filtered = _filter_by_orientation(assets, portrait)
|
raise ValueError(f"No photos in {source_label}")
|
||||||
if not filtered:
|
|
||||||
raise ValueError(f"No {'portrait' if portrait else 'landscape'} photos in {source_label}")
|
|
||||||
|
|
||||||
displayed, created_at = _load_history()
|
displayed, created_at = _load_history()
|
||||||
candidates = [a for a in filtered if a.get("id") not in displayed]
|
candidates = [a for a in assets if a.get("id") not in displayed]
|
||||||
if not candidates:
|
if not candidates:
|
||||||
print(f"All {len(filtered)} photos shown, picking from full list")
|
print(f"All {len(assets)} photos shown, picking from full list")
|
||||||
candidates = filtered
|
candidates = assets
|
||||||
else:
|
else:
|
||||||
print(f"Photos: {len(candidates)} new / {len(filtered)} total")
|
print(f"Photos: {len(candidates)} new / {len(assets)} total")
|
||||||
|
|
||||||
|
candidates = _bias_by_orientation(candidates, orientation in (90, 270))
|
||||||
|
|
||||||
asset = _pick_weighted_random(candidates)
|
asset = _pick_weighted_random(candidates)
|
||||||
dest = Path(tempfile.gettempdir()) / "immich_photo.jpg"
|
with tempfile.NamedTemporaryFile(prefix="immich_photo_", suffix=".jpg", delete=False) as tmp:
|
||||||
|
dest = Path(tmp.name)
|
||||||
path = client.download_asset(asset["id"], dest)
|
path = client.download_asset(asset["id"], dest)
|
||||||
displayed.add(asset["id"])
|
displayed.add(asset["id"])
|
||||||
_save_history(displayed, created_at)
|
_save_history(displayed, created_at)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue