Improve cropping with rule of thirds

This commit is contained in:
Andras Schmelczer 2026-05-04 18:19:52 +01:00
parent 5890237449
commit cbee345d93
2 changed files with 64 additions and 47 deletions

View file

@ -1,25 +1,29 @@
"""Resize-to-cover then crop, biased toward Immich-detected face boxes.""" """Resize-to-cover with face-aware positioning.
When a portrait source is cropped onto a landscape target, the face joint-span
centre lands on the top third of the crop window instead of the middle, so the
eyes sit on the upper-third line where landscape composition naturally reads.
"""
import math import math
from PIL import Image from PIL import Image
# Face boxes end at the hairline; extend each box upward by this fraction of # Face boxes end at the hairline; extend each box upward by this fraction of
# its own height so the fit-check considers the head, not just the face. # its own height so the joint-span midpoint lands closer to the eyes than the
# bare face centre.
HEAD_EXTENSION = 0.4 HEAD_EXTENSION = 0.4
def face_aware_crop( def face_aware_crop(
image: Image.Image, target_w: int, target_h: int, faces: list[dict] image: Image.Image, target_w: int, target_h: int, faces: list[dict]
) -> Image.Image: ) -> Image.Image:
"""Resize to cover (target_w, target_h), then crop to keep faces in frame. """Resize to cover (target_w, target_h), then crop biased toward face boxes.
Each face dict has imageWidth/imageHeight (the coord-space dims) and Joint-span midpoint of the head-extended boxes sets the crop centre. For
boundingBoxX1/Y1/X2/Y2. Per axis: if every (head-extended) face fits in portrait sources rendered on a landscape target, the centre is placed at
the crop we centre on the joint span so all faces are included with hair the top third of the crop window (rule of thirds) instead of the middle.
clearance on top. If the span doesn't fit, we fall back to the Plain centre crop when no faces.
area-weighted centroid of the unextended boxes that biases toward the
biggest, presumably foreground, face. Plain center crop when no faces.
""" """
img_w, img_h = image.size img_w, img_h = image.size
img_aspect = img_w / img_h img_aspect = img_w / img_h
@ -43,22 +47,13 @@ def face_aware_crop(
y1 = f["boundingBoxY1"] * sy y1 = f["boundingBoxY1"] * sy
x2 = f["boundingBoxX2"] * sx x2 = f["boundingBoxX2"] * sx
y2 = f["boundingBoxY2"] * sy y2 = f["boundingBoxY2"] * sy
area = max(0.0, (x2 - x1) * (y2 - y1)) boxes.append((x1, y1, x2, y2))
boxes.append((x1, y1, x2, y2, area)) cx = (min(b[0] for b in boxes) + max(b[2] for b in boxes)) / 2
x_lo = min(b[0] for b in boxes)
x_hi = max(b[2] for b in boxes)
cx = (x_lo + x_hi) / 2 if x_hi - x_lo <= target_w else _weighted_center(boxes, 0, 2)
y_lo_ext = min(b[1] - (b[3] - b[1]) * HEAD_EXTENSION for b in boxes) y_lo_ext = min(b[1] - (b[3] - b[1]) * HEAD_EXTENSION for b in boxes)
y_hi = max(b[3] for b in boxes) y_hi = max(b[3] for b in boxes)
cy = (y_lo_ext + y_hi) / 2 if y_hi - y_lo_ext <= target_h else _weighted_center(boxes, 1, 3) cy = (y_lo_ext + y_hi) / 2
y_anchor = target_h / 3 if img_h > img_w and target_w > target_h else target_h / 2
x_off = max(0, min(int(cx - target_w / 2), new_w - target_w)) x_off = max(0, min(int(cx - target_w / 2), new_w - target_w))
y_off = max(0, min(int(cy - target_h / 2), new_h - target_h)) y_off = max(0, min(int(cy - y_anchor), new_h - target_h))
return resized.crop((x_off, y_off, x_off + target_w, y_off + target_h)) return resized.crop((x_off, y_off, x_off + target_w, y_off + target_h))
def _weighted_center(boxes: list[tuple], lo: int, hi: int) -> float:
total = sum(b[4] for b in boxes) or 1.0
return sum((b[lo] + b[hi]) / 2 * b[4] for b in boxes) / total

View file

@ -13,6 +13,14 @@ from net import urlopen_with_retry
HISTORY_FILE = Path(__file__).parent.parent / "photo_history.json" HISTORY_FILE = Path(__file__).parent.parent / "photo_history.json"
CACHE_DIR = Path(tempfile.gettempdir()) / "frame_cache" CACHE_DIR = Path(tempfile.gettempdir()) / "frame_cache"
# Soft preference for picking photos whose orientation matches the frame.
# Mismatched-orientation photos still appear, just less often, since
# face_aware_crop handles them via the rule-of-thirds composition.
ORIENTATION_MATCH_WEIGHT = 0.8
ORIENTATION_DIFFER_WEIGHT = 0.2
_ROTATED_EXIF_ORIENTATIONS = {5, 6, 7, 8, "5", "6", "7", "8"}
def _cache_get(key: str) -> list[dict] | None: def _cache_get(key: str) -> list[dict] | None:
path = CACHE_DIR / f"{key}.json" path = CACHE_DIR / f"{key}.json"
@ -144,23 +152,36 @@ class ImmichClient:
return assets return assets
_ROTATED_ORIENTATIONS = {5, 6, 7, 8, "5", "6", "7", "8"} def _is_portrait(asset: dict) -> bool | None:
"""True if the asset's pixel orientation is portrait, None if EXIF dims are missing."""
exif = asset.get("exifInfo") or {}
def _filter_by_orientation(assets: list[dict], portrait: bool) -> list[dict]: w, h = exif.get("exifImageWidth") or 0, exif.get("exifImageHeight") or 0
"""Keep assets matching the requested orientation. Skips assets without EXIF dimensions."""
out = []
for a in assets:
exif = a.get("exifInfo") or {}
w = exif.get("exifImageWidth") or 0
h = exif.get("exifImageHeight") or 0
if not (w and h): if not (w and h):
continue return None
if exif.get("orientation") in _ROTATED_ORIENTATIONS: if exif.get("orientation") in _ROTATED_EXIF_ORIENTATIONS:
w, h = h, w w, h = h, w
if (h > w) == portrait: return h > w
out.append(a)
return out
def _bias_by_orientation(candidates: list[dict], frame_portrait: bool) -> list[dict]:
"""Pick the matching or differing-orientation pool per the configured weights."""
matching, differing = [], []
for a in candidates:
is_p = _is_portrait(a)
# Unknown orientation defaults to "matching" — better to include than to drop.
if is_p is None or is_p == frame_portrait:
matching.append(a)
else:
differing.append(a)
if not differing:
return matching
if not matching:
return differing
pools = [(matching, ORIENTATION_MATCH_WEIGHT), (differing, ORIENTATION_DIFFER_WEIGHT)]
pool, _ = random.choices(pools, weights=[w for _, w in pools])[0]
chosen = "matching" if pool is matching else "differing"
print(f"Orientation: {len(matching)} matching, {len(differing)} differing, picked {chosen}")
return pool
def _on_this_day_candidates(assets: list[dict]) -> tuple[list[dict], bool]: def _on_this_day_candidates(assets: list[dict]) -> tuple[list[dict], bool]:
@ -223,21 +244,22 @@ def _pick_weighted_random(assets: list[dict]) -> dict:
def _pick_and_download( def _pick_and_download(
client: ImmichClient, assets: list[dict], orientation: int, source_label: str client: ImmichClient, assets: list[dict], orientation: int, source_label: str
) -> tuple[Path, dict]: ) -> tuple[Path, dict]:
portrait = orientation in (90, 270) if not assets:
filtered = _filter_by_orientation(assets, portrait) raise ValueError(f"No photos in {source_label}")
if not filtered:
raise ValueError(f"No {'portrait' if portrait else 'landscape'} photos in {source_label}")
displayed, created_at = _load_history() displayed, created_at = _load_history()
candidates = [a for a in filtered if a.get("id") not in displayed] candidates = [a for a in assets if a.get("id") not in displayed]
if not candidates: if not candidates:
print(f"All {len(filtered)} photos shown, picking from full list") print(f"All {len(assets)} photos shown, picking from full list")
candidates = filtered candidates = assets
else: else:
print(f"Photos: {len(candidates)} new / {len(filtered)} total") print(f"Photos: {len(candidates)} new / {len(assets)} total")
candidates = _bias_by_orientation(candidates, orientation in (90, 270))
asset = _pick_weighted_random(candidates) asset = _pick_weighted_random(candidates)
dest = Path(tempfile.gettempdir()) / "immich_photo.jpg" with tempfile.NamedTemporaryFile(prefix="immich_photo_", suffix=".jpg", delete=False) as tmp:
dest = Path(tmp.name)
path = client.download_asset(asset["id"], dest) path = client.download_asset(asset["id"], dest)
displayed.add(asset["id"]) displayed.add(asset["id"])
_save_history(displayed, created_at) _save_history(displayed, created_at)