Improve cropping with rule of thirds

2026-05-04 18:19:52 +01:00 · 2026-05-04 18:19:52 +01:00 · cbee345d93
commit cbee345d93
parent 5890237449
2 changed files with 64 additions and 47 deletions
--- a/src/lib/crop.py
+++ b/src/lib/crop.py
@ -1,25 +1,29 @@
-"""Resize-to-cover then crop, biased toward Immich-detected face boxes."""
+"""Resize-to-cover with face-aware positioning.
+
+When a portrait source is cropped onto a landscape target, the face joint-span
+centre lands on the top third of the crop window instead of the middle, so the
+eyes sit on the upper-third line where landscape composition naturally reads.
+"""

 import math

 from PIL import Image

 # Face boxes end at the hairline; extend each box upward by this fraction of
-# its own height so the fit-check considers the head, not just the face.
+# its own height so the joint-span midpoint lands closer to the eyes than the
+# bare face centre.
 HEAD_EXTENSION = 0.4


 def face_aware_crop(
    image: Image.Image, target_w: int, target_h: int, faces: list[dict]
 ) -> Image.Image:
-    """Resize to cover (target_w, target_h), then crop to keep faces in frame.
+    """Resize to cover (target_w, target_h), then crop biased toward face boxes.

-    Each face dict has imageWidth/imageHeight (the coord-space dims) and
-    boundingBoxX1/Y1/X2/Y2. Per axis: if every (head-extended) face fits in
-    the crop we centre on the joint span so all faces are included with hair
-    clearance on top. If the span doesn't fit, we fall back to the
-    area-weighted centroid of the unextended boxes — that biases toward the
-    biggest, presumably foreground, face. Plain center crop when no faces.
+    Joint-span midpoint of the head-extended boxes sets the crop centre. For
+    portrait sources rendered on a landscape target, the centre is placed at
+    the top third of the crop window (rule of thirds) instead of the middle.
+    Plain centre crop when no faces.
    """
    img_w, img_h = image.size
    img_aspect = img_w / img_h
@ -43,22 +47,13 @@ def face_aware_crop(
            y1 = f["boundingBoxY1"] * sy
            x2 = f["boundingBoxX2"] * sx
            y2 = f["boundingBoxY2"] * sy
-            area = max(0.0, (x2 - x1) * (y2 - y1))
-            boxes.append((x1, y1, x2, y2, area))
-
-        x_lo = min(b[0] for b in boxes)
-        x_hi = max(b[2] for b in boxes)
-        cx = (x_lo + x_hi) / 2 if x_hi - x_lo <= target_w else _weighted_center(boxes, 0, 2)
-
+            boxes.append((x1, y1, x2, y2))
+        cx = (min(b[0] for b in boxes) + max(b[2] for b in boxes)) / 2
        y_lo_ext = min(b[1] - (b[3] - b[1]) * HEAD_EXTENSION for b in boxes)
        y_hi = max(b[3] for b in boxes)
-        cy = (y_lo_ext + y_hi) / 2 if y_hi - y_lo_ext <= target_h else _weighted_center(boxes, 1, 3)
+        cy = (y_lo_ext + y_hi) / 2

+    y_anchor = target_h / 3 if img_h > img_w and target_w > target_h else target_h / 2
    x_off = max(0, min(int(cx - target_w / 2), new_w - target_w))
-    y_off = max(0, min(int(cy - target_h / 2), new_h - target_h))
+    y_off = max(0, min(int(cy - y_anchor), new_h - target_h))
    return resized.crop((x_off, y_off, x_off + target_w, y_off + target_h))
-
-
-def _weighted_center(boxes: list[tuple], lo: int, hi: int) -> float:
-    total = sum(b[4] for b in boxes) or 1.0
-    return sum((b[lo] + b[hi]) / 2 * b[4] for b in boxes) / total
--- a/src/lib/immich.py
+++ b/src/lib/immich.py
@ -13,6 +13,14 @@ from net import urlopen_with_retry
 HISTORY_FILE = Path(__file__).parent.parent / "photo_history.json"
 CACHE_DIR = Path(tempfile.gettempdir()) / "frame_cache"

+# Soft preference for picking photos whose orientation matches the frame.
+# Mismatched-orientation photos still appear, just less often, since
+# face_aware_crop handles them via the rule-of-thirds composition.
+ORIENTATION_MATCH_WEIGHT = 0.8
+ORIENTATION_DIFFER_WEIGHT = 0.2
+
+_ROTATED_EXIF_ORIENTATIONS = {5, 6, 7, 8, "5", "6", "7", "8"}
+

 def _cache_get(key: str) -> list[dict] | None:
    path = CACHE_DIR / f"{key}.json"
@ -144,23 +152,36 @@ class ImmichClient:
        return assets


-_ROTATED_ORIENTATIONS = {5, 6, 7, 8, "5", "6", "7", "8"}
-
-
-def _filter_by_orientation(assets: list[dict], portrait: bool) -> list[dict]:
-    """Keep assets matching the requested orientation. Skips assets without EXIF dimensions."""
-    out = []
-    for a in assets:
-        exif = a.get("exifInfo") or {}
-        w = exif.get("exifImageWidth") or 0
-        h = exif.get("exifImageHeight") or 0
+def _is_portrait(asset: dict) -> bool | None:
+    """True if the asset's pixel orientation is portrait, None if EXIF dims are missing."""
+    exif = asset.get("exifInfo") or {}
+    w, h = exif.get("exifImageWidth") or 0, exif.get("exifImageHeight") or 0
    if not (w and h):
-            continue
-        if exif.get("orientation") in _ROTATED_ORIENTATIONS:
+        return None
+    if exif.get("orientation") in _ROTATED_EXIF_ORIENTATIONS:
        w, h = h, w
-        if (h > w) == portrait:
-            out.append(a)
-    return out
+    return h > w
+
+
+def _bias_by_orientation(candidates: list[dict], frame_portrait: bool) -> list[dict]:
+    """Pick the matching or differing-orientation pool per the configured weights."""
+    matching, differing = [], []
+    for a in candidates:
+        is_p = _is_portrait(a)
+        # Unknown orientation defaults to "matching" — better to include than to drop.
+        if is_p is None or is_p == frame_portrait:
+            matching.append(a)
+        else:
+            differing.append(a)
+    if not differing:
+        return matching
+    if not matching:
+        return differing
+    pools = [(matching, ORIENTATION_MATCH_WEIGHT), (differing, ORIENTATION_DIFFER_WEIGHT)]
+    pool, _ = random.choices(pools, weights=[w for _, w in pools])[0]
+    chosen = "matching" if pool is matching else "differing"
+    print(f"Orientation: {len(matching)} matching, {len(differing)} differing, picked {chosen}")
+    return pool


 def _on_this_day_candidates(assets: list[dict]) -> tuple[list[dict], bool]:
@ -223,21 +244,22 @@ def _pick_weighted_random(assets: list[dict]) -> dict:
 def _pick_and_download(
    client: ImmichClient, assets: list[dict], orientation: int, source_label: str
 ) -> tuple[Path, dict]:
-    portrait = orientation in (90, 270)
-    filtered = _filter_by_orientation(assets, portrait)
-    if not filtered:
-        raise ValueError(f"No {'portrait' if portrait else 'landscape'} photos in {source_label}")
+    if not assets:
+        raise ValueError(f"No photos in {source_label}")

    displayed, created_at = _load_history()
-    candidates = [a for a in filtered if a.get("id") not in displayed]
+    candidates = [a for a in assets if a.get("id") not in displayed]
    if not candidates:
-        print(f"All {len(filtered)} photos shown, picking from full list")
-        candidates = filtered
+        print(f"All {len(assets)} photos shown, picking from full list")
+        candidates = assets
    else:
-        print(f"Photos: {len(candidates)} new / {len(filtered)} total")
+        print(f"Photos: {len(candidates)} new / {len(assets)} total")
+
+    candidates = _bias_by_orientation(candidates, orientation in (90, 270))

    asset = _pick_weighted_random(candidates)
-    dest = Path(tempfile.gettempdir()) / "immich_photo.jpg"
+    with tempfile.NamedTemporaryFile(prefix="immich_photo_", suffix=".jpg", delete=False) as tmp:
+        dest = Path(tmp.name)
    path = client.download_asset(asset["id"], dest)
    displayed.add(asset["id"])
    _save_history(displayed, created_at)