This commit is contained in:
Andras Schmelczer 2026-05-03 10:39:31 +01:00
parent 9a009f0b4c
commit eed1567f7f
12 changed files with 463 additions and 243 deletions

View file

@ -2,6 +2,7 @@
"cells": [
{
"cell_type": "markdown",
"id": "7fb27b941602401d91542211134fc71a",
"metadata": {},
"source": [
"# Face-aware crop vs. centre crop\n",
@ -25,29 +26,42 @@
{
"cell_type": "code",
"execution_count": 1,
"id": "acae54e37e7d407bbb7b55eff062a284",
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"sys.path.insert(0, '.')\n",
"from _helpers import bootstrap, immich_client, fetch_pool, download_image, silenced, show_grid, CACHE_DIR\n",
"\n",
"sys.path.insert(0, \".\")\n",
"from _helpers import (\n",
" CACHE_DIR,\n",
" bootstrap,\n",
" download_image,\n",
" fetch_pool,\n",
" immich_client,\n",
" show_grid,\n",
" silenced,\n",
")\n",
"\n",
"bootstrap()\n",
"\n",
"import json\n",
"import math\n",
"import numpy as np\n",
"from PIL import Image, ImageDraw\n",
"from waveshare_epd.epd7in3e import EPD_WIDTH, EPD_HEIGHT, _crop_center\n",
"from crop import face_aware_crop, HEAD_EXTENSION\n",
"\n",
"POOL_SIZE = 80 # smaller than auto-tune notebook — each photo costs an extra API call for faces\n",
"import numpy as np\n",
"from crop import HEAD_EXTENSION, face_aware_crop\n",
"from PIL import Image, ImageDraw\n",
"from waveshare_epd.epd7in3e import EPD_HEIGHT, EPD_WIDTH, _crop_center\n",
"\n",
"POOL_SIZE = 80 # smaller than auto-tune notebook — each photo costs an extra API call for faces\n",
"SEED = 7\n",
"N_PICKS = 4 # most-divergent picks (plus one no-faces baseline)"
"N_PICKS = 4 # most-divergent picks (plus one no-faces baseline)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "9a63283cbaf04dbcab1f6479b197f3a8",
"metadata": {},
"outputs": [
{
@ -62,27 +76,28 @@
"source": [
"client = immich_client()\n",
"pool_assets = fetch_pool(client, pool_size=POOL_SIZE, seed=SEED)\n",
"print(f'pool size: {len(pool_assets)} landscape photos')\n",
"print(f\"pool size: {len(pool_assets)} landscape photos\")\n",
"\n",
"# Cache the face lookups — each is a separate /assets/{id} API call.\n",
"face_cache_path = CACHE_DIR / 'faces.json'\n",
"face_cache_path = CACHE_DIR / \"faces.json\"\n",
"CACHE_DIR.mkdir(exist_ok=True)\n",
"face_cache = json.loads(face_cache_path.read_text()) if face_cache_path.exists() else {}\n",
"\n",
"fetched_now = 0\n",
"for asset in pool_assets:\n",
" if asset['id'] in face_cache:\n",
" if asset[\"id\"] in face_cache:\n",
" continue\n",
" face_cache[asset['id']] = client.get_asset_faces(asset['id'])\n",
" face_cache[asset[\"id\"]] = client.get_asset_faces(asset[\"id\"])\n",
" fetched_now += 1\n",
"if fetched_now:\n",
" face_cache_path.write_text(json.dumps(face_cache))\n",
"print(f'face data: {fetched_now} fetched, {len(pool_assets) - fetched_now} cached')"
"print(f\"face data: {fetched_now} fetched, {len(pool_assets) - fetched_now} cached\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "8dd0d8092fe74a7c96281538738b07e2",
"metadata": {},
"outputs": [
{
@ -111,10 +126,12 @@
" if faces:\n",
" boxes = []\n",
" for f in faces:\n",
" sx = new_w / (f.get('imageWidth') or img_w)\n",
" sy = new_h / (f.get('imageHeight') or img_h)\n",
" x1 = f['boundingBoxX1'] * sx; y1 = f['boundingBoxY1'] * sy\n",
" x2 = f['boundingBoxX2'] * sx; y2 = f['boundingBoxY2'] * sy\n",
" sx = new_w / (f.get(\"imageWidth\") or img_w)\n",
" sy = new_h / (f.get(\"imageHeight\") or img_h)\n",
" x1 = f[\"boundingBoxX1\"] * sx\n",
" y1 = f[\"boundingBoxY1\"] * sy\n",
" x2 = f[\"boundingBoxX2\"] * sx\n",
" y2 = f[\"boundingBoxY2\"] * sy\n",
" boxes.append((x1, y1, x2, y2, max(0.0, (x2 - x1) * (y2 - y1))))\n",
" x_lo, x_hi = min(b[0] for b in boxes), max(b[2] for b in boxes)\n",
" if x_hi - x_lo <= target_w:\n",
@ -129,39 +146,53 @@
" else:\n",
" total = sum(b[4] for b in boxes) or 1.0\n",
" cy = sum((b[1] + b[3]) / 2 * b[4] for b in boxes) / total\n",
" face_off = (max(0, min(int(cx - target_w / 2), new_w - target_w)),\n",
" max(0, min(int(cy - target_h / 2), new_h - target_h)))\n",
" face_off = (\n",
" max(0, min(int(cx - target_w / 2), new_w - target_w)),\n",
" max(0, min(int(cy - target_h / 2), new_h - target_h)),\n",
" )\n",
" return centre_off, face_off, (new_w, new_h), boxes if faces else []\n",
"\n",
"\n",
"stats = []\n",
"for asset in pool_assets:\n",
" exif = asset.get('exifInfo') or {}\n",
" iw, ih = exif.get('exifImageWidth') or 0, exif.get('exifImageHeight') or 0\n",
" if exif.get('orientation') in (6, 8, '6', '8'):\n",
" exif = asset.get(\"exifInfo\") or {}\n",
" iw, ih = exif.get(\"exifImageWidth\") or 0, exif.get(\"exifImageHeight\") or 0\n",
" if exif.get(\"orientation\") in (6, 8, \"6\", \"8\"):\n",
" iw, ih = ih, iw\n",
" if not (iw and ih):\n",
" continue\n",
" faces = face_cache.get(asset['id'], [])\n",
" faces = face_cache.get(asset[\"id\"], [])\n",
" centre_off, face_off, canvas, boxes = crop_offsets(iw, ih, EPD_WIDTH, EPD_HEIGHT, faces)\n",
" dx = face_off[0] - centre_off[0]\n",
" dy = face_off[1] - centre_off[1]\n",
" stats.append({\n",
" 'asset': asset, 'faces': faces, 'boxes': boxes,\n",
" 'centre_off': centre_off, 'face_off': face_off, 'canvas': canvas,\n",
" 'shift': math.hypot(dx, dy), 'dx': dx, 'dy': dy,\n",
" })\n",
" stats.append(\n",
" {\n",
" \"asset\": asset,\n",
" \"faces\": faces,\n",
" \"boxes\": boxes,\n",
" \"centre_off\": centre_off,\n",
" \"face_off\": face_off,\n",
" \"canvas\": canvas,\n",
" \"shift\": math.hypot(dx, dy),\n",
" \"dx\": dx,\n",
" \"dy\": dy,\n",
" }\n",
" )\n",
"\n",
"with_faces = [s for s in stats if s['faces']]\n",
"no_faces = [s for s in stats if not s['faces']]\n",
"print(f'{len(with_faces)} with faces, {len(no_faces)} without')\n",
"shifts = np.array([s['shift'] for s in with_faces])\n",
"with_faces = [s for s in stats if s[\"faces\"]]\n",
"no_faces = [s for s in stats if not s[\"faces\"]]\n",
"print(f\"{len(with_faces)} with faces, {len(no_faces)} without\")\n",
"shifts = np.array([s[\"shift\"] for s in with_faces])\n",
"if len(shifts):\n",
" print(f'crop shift (px): min={shifts.min():.0f} median={np.median(shifts):.0f} max={shifts.max():.0f}')"
" print(\n",
" f\"crop shift (px): min={shifts.min():.0f} median={np.median(shifts):.0f} max={shifts.max():.0f}\"\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "72eea5119410473aa328ad9291626812",
"metadata": {},
"outputs": [
{
@ -177,20 +208,23 @@
}
],
"source": [
"with_faces.sort(key=lambda s: s['shift'], reverse=True)\n",
"with_faces.sort(key=lambda s: s[\"shift\"], reverse=True)\n",
"picks = with_faces[:N_PICKS]\n",
"if no_faces:\n",
" picks.append(no_faces[0])\n",
"\n",
"print(f\"{'name':36s} {'faces':>5s} {'dx':>5s} {'dy':>5s} {'|shift|':>7s}\")\n",
"for s in picks:\n",
" name = (s['asset'].get('originalFileName') or s['asset']['id'])[:36]\n",
" print(f\"{name:36s} {len(s['faces']):>5d} {s['dx']:>+5.0f} {s['dy']:>+5.0f} {s['shift']:>7.0f}\")"
" name = (s[\"asset\"].get(\"originalFileName\") or s[\"asset\"][\"id\"])[:36]\n",
" print(\n",
" f\"{name:36s} {len(s['faces']):>5d} {s['dx']:>+5.0f} {s['dy']:>+5.0f} {s['shift']:>7.0f}\"\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "8edb47106e1a46a883d545849b8ab81b",
"metadata": {},
"outputs": [
{
@ -227,15 +261,16 @@
" canvas = image.resize((new_w, new_h), Image.LANCZOS).copy()\n",
" draw = ImageDraw.Draw(canvas)\n",
" for f in faces:\n",
" sx = new_w / (f.get('imageWidth') or iw)\n",
" sy = new_h / (f.get('imageHeight') or ih)\n",
" x1, y1 = f['boundingBoxX1'] * sx, f['boundingBoxY1'] * sy\n",
" x2, y2 = f['boundingBoxX2'] * sx, f['boundingBoxY2'] * sy\n",
" sx = new_w / (f.get(\"imageWidth\") or iw)\n",
" sy = new_h / (f.get(\"imageHeight\") or ih)\n",
" x1, y1 = f[\"boundingBoxX1\"] * sx, f[\"boundingBoxY1\"] * sy\n",
" x2, y2 = f[\"boundingBoxX2\"] * sx, f[\"boundingBoxY2\"] * sy\n",
" head_y1 = y1 - (y2 - y1) * HEAD_EXTENSION\n",
" draw.rectangle([x1, head_y1, x2, y2], outline=(0, 220, 255), width=4)\n",
" draw.rectangle([x1, y1, x2, y2], outline=(255, 220, 0), width=2)\n",
" return canvas, (new_w, new_h)\n",
"\n",
"\n",
"def draw_crop_windows(canvas, centre_off, face_off, target=(EPD_WIDTH, EPD_HEIGHT)):\n",
" out = canvas.copy()\n",
" draw = ImageDraw.Draw(out)\n",
@ -245,28 +280,32 @@
" draw.rectangle([fx, fy, fx + target[0], fy + target[1]], outline=(0, 200, 0), width=4)\n",
" return out\n",
"\n",
"\n",
"rows, titles = [], []\n",
"for s in picks:\n",
" img = download_image(client, s['asset'])\n",
" canvas, _ = draw_boxes_on_canvas(img, s['faces'])\n",
" windowed = draw_crop_windows(canvas, s['centre_off'], s['face_off'])\n",
" img = download_image(client, s[\"asset\"])\n",
" canvas, _ = draw_boxes_on_canvas(img, s[\"faces\"])\n",
" windowed = draw_crop_windows(canvas, s[\"centre_off\"], s[\"face_off\"])\n",
" centre = _crop_center(img, EPD_WIDTH, EPD_HEIGHT)\n",
" smart = face_aware_crop(img, EPD_WIDTH, EPD_HEIGHT, s['faces'])\n",
" name = (s['asset'].get('originalFileName') or s['asset']['id'])[:24]\n",
" smart = face_aware_crop(img, EPD_WIDTH, EPD_HEIGHT, s[\"faces\"])\n",
" name = (s[\"asset\"].get(\"originalFileName\") or s[\"asset\"][\"id\"])[:24]\n",
" rows.append([canvas, centre, smart, windowed])\n",
" titles.append([\n",
" f'{name}\\n{len(s[\"faces\"])} face{\"\" if len(s[\"faces\"]) == 1 else \"s\"}',\n",
" 'centre crop',\n",
" f'face-aware crop\\nshift: ({s[\"dx\"]:+.0f}, {s[\"dy\"]:+.0f}) px',\n",
" 'crop windows on canvas\\norange = centre, green = face-aware',\n",
" ])\n",
" titles.append(\n",
" [\n",
" f\"{name}\\n{len(s['faces'])} face{'' if len(s['faces']) == 1 else 's'}\",\n",
" \"centre crop\",\n",
" f\"face-aware crop\\nshift: ({s['dx']:+.0f}, {s['dy']:+.0f}) px\",\n",
" \"crop windows on canvas\\norange = centre, green = face-aware\",\n",
" ]\n",
" )\n",
"\n",
"with silenced():\n",
" show_grid(rows, titles, figsize_scale=(5.0, 3.2));"
" show_grid(rows, titles, figsize_scale=(5.0, 3.2))"
]
},
{
"cell_type": "markdown",
"id": "10185d26023b46108eb7d9f57d49d2b3",
"metadata": {},
"source": [
"## Reading the comparison\n",