Improve cropping with rule of thirds

This commit is contained in:
Andras Schmelczer 2026-05-04 18:19:52 +01:00
parent 5890237449
commit cbee345d93
2 changed files with 64 additions and 47 deletions

View file

@ -1,25 +1,29 @@
"""Resize-to-cover then crop, biased toward Immich-detected face boxes."""
"""Resize-to-cover with face-aware positioning.
When a portrait source is cropped onto a landscape target, the face joint-span
centre lands on the top third of the crop window instead of the middle, so the
eyes sit on the upper-third line where landscape composition naturally reads.
"""
import math
from PIL import Image
# Face boxes end at the hairline; extend each box upward by this fraction of
# its own height so the fit-check considers the head, not just the face.
# its own height so the joint-span midpoint lands closer to the eyes than the
# bare face centre.
HEAD_EXTENSION = 0.4
def face_aware_crop(
image: Image.Image, target_w: int, target_h: int, faces: list[dict]
) -> Image.Image:
"""Resize to cover (target_w, target_h), then crop to keep faces in frame.
"""Resize to cover (target_w, target_h), then crop biased toward face boxes.
Each face dict has imageWidth/imageHeight (the coord-space dims) and
boundingBoxX1/Y1/X2/Y2. Per axis: if every (head-extended) face fits in
the crop we centre on the joint span so all faces are included with hair
clearance on top. If the span doesn't fit, we fall back to the
area-weighted centroid of the unextended boxes that biases toward the
biggest, presumably foreground, face. Plain center crop when no faces.
Joint-span midpoint of the head-extended boxes sets the crop centre. For
portrait sources rendered on a landscape target, the centre is placed at
the top third of the crop window (rule of thirds) instead of the middle.
Plain centre crop when no faces.
"""
img_w, img_h = image.size
img_aspect = img_w / img_h
@ -43,22 +47,13 @@ def face_aware_crop(
y1 = f["boundingBoxY1"] * sy
x2 = f["boundingBoxX2"] * sx
y2 = f["boundingBoxY2"] * sy
area = max(0.0, (x2 - x1) * (y2 - y1))
boxes.append((x1, y1, x2, y2, area))
x_lo = min(b[0] for b in boxes)
x_hi = max(b[2] for b in boxes)
cx = (x_lo + x_hi) / 2 if x_hi - x_lo <= target_w else _weighted_center(boxes, 0, 2)
boxes.append((x1, y1, x2, y2))
cx = (min(b[0] for b in boxes) + max(b[2] for b in boxes)) / 2
y_lo_ext = min(b[1] - (b[3] - b[1]) * HEAD_EXTENSION for b in boxes)
y_hi = max(b[3] for b in boxes)
cy = (y_lo_ext + y_hi) / 2 if y_hi - y_lo_ext <= target_h else _weighted_center(boxes, 1, 3)
cy = (y_lo_ext + y_hi) / 2
y_anchor = target_h / 3 if img_h > img_w and target_w > target_h else target_h / 2
x_off = max(0, min(int(cx - target_w / 2), new_w - target_w))
y_off = max(0, min(int(cy - target_h / 2), new_h - target_h))
y_off = max(0, min(int(cy - y_anchor), new_h - target_h))
return resized.crop((x_off, y_off, x_off + target_w, y_off + target_h))
def _weighted_center(boxes: list[tuple], lo: int, hi: int) -> float:
total = sum(b[4] for b in boxes) or 1.0
return sum((b[lo] + b[hi]) / 2 * b[4] for b in boxes) / total

View file

@ -13,6 +13,14 @@ from net import urlopen_with_retry
HISTORY_FILE = Path(__file__).parent.parent / "photo_history.json"
CACHE_DIR = Path(tempfile.gettempdir()) / "frame_cache"
# Soft preference for picking photos whose orientation matches the frame.
# Mismatched-orientation photos still appear, just less often, since
# face_aware_crop handles them via the rule-of-thirds composition.
ORIENTATION_MATCH_WEIGHT = 0.8
ORIENTATION_DIFFER_WEIGHT = 0.2
_ROTATED_EXIF_ORIENTATIONS = {5, 6, 7, 8, "5", "6", "7", "8"}
def _cache_get(key: str) -> list[dict] | None:
path = CACHE_DIR / f"{key}.json"
@ -144,23 +152,36 @@ class ImmichClient:
return assets
_ROTATED_ORIENTATIONS = {5, 6, 7, 8, "5", "6", "7", "8"}
def _filter_by_orientation(assets: list[dict], portrait: bool) -> list[dict]:
"""Keep assets matching the requested orientation. Skips assets without EXIF dimensions."""
out = []
for a in assets:
exif = a.get("exifInfo") or {}
w = exif.get("exifImageWidth") or 0
h = exif.get("exifImageHeight") or 0
def _is_portrait(asset: dict) -> bool | None:
"""True if the asset's pixel orientation is portrait, None if EXIF dims are missing."""
exif = asset.get("exifInfo") or {}
w, h = exif.get("exifImageWidth") or 0, exif.get("exifImageHeight") or 0
if not (w and h):
continue
if exif.get("orientation") in _ROTATED_ORIENTATIONS:
return None
if exif.get("orientation") in _ROTATED_EXIF_ORIENTATIONS:
w, h = h, w
if (h > w) == portrait:
out.append(a)
return out
return h > w
def _bias_by_orientation(candidates: list[dict], frame_portrait: bool) -> list[dict]:
"""Pick the matching or differing-orientation pool per the configured weights."""
matching, differing = [], []
for a in candidates:
is_p = _is_portrait(a)
# Unknown orientation defaults to "matching" — better to include than to drop.
if is_p is None or is_p == frame_portrait:
matching.append(a)
else:
differing.append(a)
if not differing:
return matching
if not matching:
return differing
pools = [(matching, ORIENTATION_MATCH_WEIGHT), (differing, ORIENTATION_DIFFER_WEIGHT)]
pool, _ = random.choices(pools, weights=[w for _, w in pools])[0]
chosen = "matching" if pool is matching else "differing"
print(f"Orientation: {len(matching)} matching, {len(differing)} differing, picked {chosen}")
return pool
def _on_this_day_candidates(assets: list[dict]) -> tuple[list[dict], bool]:
@ -223,21 +244,22 @@ def _pick_weighted_random(assets: list[dict]) -> dict:
def _pick_and_download(
client: ImmichClient, assets: list[dict], orientation: int, source_label: str
) -> tuple[Path, dict]:
portrait = orientation in (90, 270)
filtered = _filter_by_orientation(assets, portrait)
if not filtered:
raise ValueError(f"No {'portrait' if portrait else 'landscape'} photos in {source_label}")
if not assets:
raise ValueError(f"No photos in {source_label}")
displayed, created_at = _load_history()
candidates = [a for a in filtered if a.get("id") not in displayed]
candidates = [a for a in assets if a.get("id") not in displayed]
if not candidates:
print(f"All {len(filtered)} photos shown, picking from full list")
candidates = filtered
print(f"All {len(assets)} photos shown, picking from full list")
candidates = assets
else:
print(f"Photos: {len(candidates)} new / {len(filtered)} total")
print(f"Photos: {len(candidates)} new / {len(assets)} total")
candidates = _bias_by_orientation(candidates, orientation in (90, 270))
asset = _pick_weighted_random(candidates)
dest = Path(tempfile.gettempdir()) / "immich_photo.jpg"
with tempfile.NamedTemporaryFile(prefix="immich_photo_", suffix=".jpg", delete=False) as tmp:
dest = Path(tmp.name)
path = client.download_asset(asset["id"], dest)
displayed.add(asset["id"])
_save_history(displayed, created_at)