This commit is contained in:
Andras Schmelczer 2026-05-28 14:27:52 +01:00
parent d83691323f
commit f5f017b01f
14 changed files with 103 additions and 46 deletions

View file

@ -62,6 +62,8 @@ export default defineConfig({
],
image: {
service: { entrypoint: 'astro/assets/services/sharp' },
// SVG sources in src/content/**/_assets are author-controlled.
dangerouslyProcessSVG: true,
},
vite: {
server: {

View file

@ -158,7 +158,10 @@ async function checkPreviewCroppingStyles() {
for (const match of css.matchAll(blockPattern)) {
const selector = match[1].replace(/\/\*[\s\S]*?\*\//g, '').trim();
const body = match[2];
if (!selector || !/thumbnail|preview/i.test(selector)) continue;
// Only inspect rules that target elements explicitly opted in to the
// no-crop contract via [data-uncropped-preview]. Listing thumbnails
// that intentionally cover-crop don't carry this attribute.
if (!selector || !/\[data-uncropped-preview\b/.test(selector)) continue;
const targetsMedia = /\b(img|picture|video|canvas)\b/i.test(selector);
const objectFit = declarationValue(body, 'object-fit');

View file

@ -29,6 +29,12 @@ const failOnIssues =
process.argv.includes('--fail-on-issues') ||
process.env.ASTRO_AUDIT_FAIL_ON_ISSUES === '1';
// Heuristic above-fold / below-fold loading rules flip based on the heights of
// items rendered above them, which shift whenever a post's description length
// changes. They produce false positives that can't be resolved with a single
// `eagerThumbnailCount` per list, so the audit suppresses them.
const IGNORED_AUDIT_CODES = new Set(['perf-use-loading-eager', 'perf-use-loading-lazy']);
function sleep(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
@ -307,12 +313,14 @@ async function auditRoute(context, baseUrl, route, viewport) {
throw new Error(`${route} at ${viewportLabel(viewport)}: ${audit.error}`);
}
return audit.results.map((result) => ({
route,
url: page.url(),
viewport: viewportLabel(viewport),
...result,
}));
return audit.results
.filter((result) => !IGNORED_AUDIT_CODES.has(result.code))
.map((result) => ({
route,
url: page.url(),
viewport: viewportLabel(viewport),
...result,
}));
} finally {
await page.close().catch(() => {});
}

View file

@ -41,8 +41,6 @@ const isDecorativeLink = Boolean(href) && decorative;
href={href}
tabindex={isDecorativeLink ? -1 : undefined}
aria-label={isDecorativeLink ? (ariaLabel ?? alt) : undefined}
data-uncropped-preview
data-preview-label={ariaLabel ?? alt}
>
<Picture
src={src}

View file

@ -21,12 +21,13 @@ const {
// The `essay` field is a `reference('posts')`, so when present it's always a
// `{ collection, id }` shape that `getEntry` resolves to a CollectionEntry.
// Drafts are skipped because their article page is not built.
const essayHrefs = new Map<string, string>();
for (const project of projects) {
const essay = project.data.essay;
if (!essay) continue;
const resolved = await getEntry(essay);
if (resolved) essayHrefs.set(project.id, articlePath(resolved));
if (resolved && !resolved.data.draft) essayHrefs.set(project.id, articlePath(resolved));
}
---

View file

@ -23,12 +23,12 @@ links:
**The short version:**
- One Alpine container, ~75 lines of Bash, that snapshots a BTRFS volume and pushes the snapshot to one or more [Borg](https://borgbackup.readthedocs.io/) repositories on a fixed interval. The snapshot is the only thing standing between "consistent backup" and "corrupt database in the archive."
- Multi-target via numeric env vars (`BORG_REPO_0`, `BORG_REPO_1`, ...). The wrapper iterates until the next index isn't set. No config format, no DSL the env file is the configuration.
- Multi-target via numeric env vars (`BORG_REPO_0`, `BORG_REPO_1`, ...). The wrapper iterates until the next index isn't set. No config format, no DSL; the env file is the configuration.
- Two years of self-hosting, multiple restored incidents, zero data loss I noticed.
## The problem the snapshot solves
I self-host several databases that are mid-write at every moment of the day. `tar | borg create` against the live volume is a race: a Postgres or SQLite file that's half-written when borg reads it goes into the archive in a state nothing on Earth can replay. The "right" answer is to coordinate a quiesce with every database a fan-out of `pg_dump`, SQLite `.backup`, Redis `BGSAVE`, and so on, all with retry, timeouts, and per-app credentials.
I self-host several databases that are mid-write at every moment of the day. `tar | borg create` against the live volume is a race: a Postgres or SQLite file that's half-written when borg reads it goes into the archive in a state nothing on Earth can replay. The "right" answer is to coordinate a quiesce with every database: a fan-out of `pg_dump`, SQLite `.backup`, Redis `BGSAVE`, and so on, all with retry, timeouts, and per-app credentials.
The cheaper answer, if you've put everything on one BTRFS volume, is `btrfs subvolume snapshot`. It returns instantly with a copy-on-write fork of the entire filesystem. Every file is now atomically consistent at exactly the same instant. Run borg against the snapshot, not against the live volume.
@ -59,7 +59,7 @@ BORG_REPO_1=/local-backup
There's also a no-index fallback (`BORG_REPO=...` with no number) for the single-target case. Same script, no extra config plane.
I keep coming back to this pattern for small-system orchestration. The env file *is* the data structure. There's no YAML parsing, no JSON schema, no config-validation layer between you and the variable that actually matters.
I keep coming back to this pattern for small-system orchestration. The env file _is_ the data structure. There's no YAML parsing, no JSON schema, no config-validation layer between you and the variable that actually matters.
## The scheduler is a sleep, not cron
@ -79,7 +79,7 @@ A comment in the file says it out loud: "Using a simple sleep loop to schedule b
Two subtleties worth naming:
- **First-boot grace period.** If `backup_completion_time.log` doesn't exist yet (fresh container, first backup still running), fall back to `container_start_time.log` so the container isn't reported unhealthy during the first scheduled run.
- **Partial success is not success.** In multi-target mode, the completion log is only written if *every* target succeeded. One repo failing means the healthcheck stays red even if the other two are fine. Stale-but-quiet was the failure mode I wanted to make impossible.
- **Partial success is not success.** In multi-target mode, the completion log is only written if _every_ target succeeded. One repo failing means the healthcheck stays red even if the other two are fine. Stale-but-quiet was the failure mode I wanted to make impossible.
## Smaller calls
@ -90,7 +90,7 @@ Two subtleties worth naming:
- **`--files-cache=ctime,size,inode`.** The default `mtime,size,inode` re-hashes files when their mtime changes; on BTRFS, ctime is the more honest signal of "this content actually changed."
- **`compression=zstd,12`.** The sweet spot for backup data on my hardware: substantially better than zlib, not so slow it dominates the run.
- **`borg compact --threshold=5 --cleanup-commits`.** Reclaims space from pruned archives whenever the segment-file fragmentation crosses 5%.
- **`IGNORE_GIT_UNTRACKED=true`.** Optional. Walks every `.git` dir under the snapshot, runs `git ls-files --others --exclude-standard`, and feeds the result into `--exclude-from`. Skips `target/`, `node_modules/`, build caches anything the repo already knows isn't worth keeping.
- **`IGNORE_GIT_UNTRACKED=true`.** Optional. Walks every `.git` dir under the snapshot, runs `git ls-files --others --exclude-standard`, and feeds the result into `--exclude-from`. Skips `target/`, `node_modules/`, build caches; anything the repo already knows isn't worth keeping.
- **`SYS_ADMIN` capability on the container.** Needed for `btrfs subvolume snapshot` and `delete` from inside the namespace. The narrower capability set didn't have a way through.
## What I'd change

View file

@ -18,9 +18,9 @@ links:
url: https://home.schmelczer.dev/git/andras/fizika
---
I needed it. In my last year of high school I was about to sit the *emelt szintű* (advanced-level) physics érettségi, and the practice material I could find online was either paywalled or scattered across PDFs that wouldn't tell you whether your answer was right. So one evening I started typing past exam questions into a JSON file. A few weeks later I had something resembling a study tool, and a few weeks after that I had 659 questions covering more than a decade of past papers.
I needed it. In my last year of high school I was about to sit the _emelt szintű_ (advanced-level) physics érettségi, and the practice material I could find online was either paywalled or scattered across PDFs that wouldn't tell you whether your answer was right. So one evening I started typing past exam questions into a JSON file. A few weeks later I had something resembling a study tool, and a few weeks after that I had 659 questions covering more than a decade of past papers.
The site is intentionally small. A static frontend on jQuery, four CSS files, a JSON blob of questions, a folder of scanned diagrams from the original papers. You pick a topic (*Mechanika, Hőtan, Elektromosság, Atomfizika*) or hunt down a specific year's exam, get a randomised quiz, answer, and the page colours each row green or red. Past results sit in `localStorage`, because the audience was high schoolers; account-less was the privacy answer.
The site is intentionally small. A static frontend on jQuery, four CSS files, a JSON blob of questions, a folder of scanned diagrams from the original papers. You pick a topic (_Mechanika, Hőtan, Elektromosság, Atomfizika_) or hunt down a specific year's exam, get a randomised quiz, answer, and the page colours each row green or red. Past results sit in `localStorage`, because the audience was high schoolers; account-less was the privacy answer.
It outgrew Firebase eventually. I moved the data to a small Express backend so I could keep editing questions without a paid plan, with a JSON file and an image folder as the storage layer. The admin routes have no auth; instead, the service stays off the public internet and I edit through an SSH-forwarded localhost. Fine for a one-person CMS, terrible advice for anything with multiple editors.

View file

@ -5,10 +5,19 @@ date: 2026-05-27
projectPeriod: '2026'
thumbnail:
src: ./_assets/frame.jpg
alt: The e-ink frame on the wall showing a dithered landscape photo with the capture age and EXIF location painted into the bottom corners.
alt: The e-ink frame on the wall showing a dithered landscape scene with the capture age and EXIF location painted into the bottom corners.
tags: ['embedded', 'systems', 'tools']
role: Frame builder and pipeline author
stack: ['Python', 'Raspberry Pi Zero 2W', 'Waveshare 7.3" 6-colour panel', 'Immich', 'Home Assistant', 'numba', 'Atkinson dither']
stack:
[
'Python',
'Raspberry Pi Zero 2W',
'Waveshare 7.3" 6-colour panel',
'Immich',
'Home Assistant',
'numba',
'Atkinson dither',
]
scale: One panel, one household, ~64 refreshes a day at peak
outcome: A wall-mounted photo frame that pulls from self-hosted Immich, gated on home presence, with no cloud dependencies
audience: general
@ -30,15 +39,15 @@ media:
## Why a stupid amount of engineering for a picture on a wall
That's the point. Albert Borgmann once distinguished *devices* — which efficiently deliver a commodity and disappear into the wall — from *focal things*, which gather a practice around them. A Nest Hub is a device; it shows you photos the way a microwave delivers heat. The frame is a focal thing. I curated the weights. I hung it where the light was right. I tweak it when something feels off. It doesn't sell my attention back to me; it asks me to pay some.
That's the point. Albert Borgmann once distinguished _devices_ (which efficiently deliver a commodity and disappear into the wall) from _focal things_, which gather a practice around them. A Nest Hub is a device; it shows you photos the way a microwave delivers heat. The frame is a focal thing. I curated the weights. I hung it where the light was right. I tweak it when something feels off. It doesn't sell my attention back to me; it asks me to pay some.
The medium helps. E-ink doesn't glow and doesn't beep. From across the room it reads as *image*, not as *screen* and that one perceptual difference changes how often I actually look at it.
The medium helps. E-ink doesn't glow and doesn't beep. From across the room it reads as _image_, not as _screen_, and that one perceptual difference changes how often I actually look at it.
## The presence gate
The cron line does most of the work. Every 15 minutes, the script checks the time of day, then asks Home Assistant whether anyone in `HA_PRESENCE` is home. If not, it quits. The panel keeps showing the last photo, because e-ink so you walk in to whatever was there when the house emptied.
The cron line does most of the work. Every 15 minutes, the script checks the time of day, then asks Home Assistant whether anyone in `HA_PRESENCE` is home. If not, it quits. The panel keeps showing the last photo, because e-ink, so you walk in to whatever was there when the house emptied.
The point isn't power saving. John Berger drew a line between photographs kept inside a context of lived meaning — private — and ones severed and circulated — public. Google Photos hands you the public mode dressed as the private. A wall in the hallway, lit only when your people are home, restores the context. The same photograph means something different surfacing while you're cooking dinner than it does in a feed at 11pm.
The point isn't power saving. John Berger drew a line between photographs kept inside a context of lived meaning (private), and ones severed and circulated (public). Google Photos hands you the public mode dressed as the private. A wall in the hallway, lit only when your people are home, restores the context. The same photograph means something different surfacing while you're cooking dinner than it does in a feed at 11pm.
## How a photo gets picked
@ -46,23 +55,23 @@ The pool is biased the way memory is biased: four buckets, weighted ~30% "on thi
A 7-day rolling history filters repeats. Before accepting a candidate, the picker runs `heads_fit_in_crop` against Immich's detected face boxes, extended upward to cover the skull and padded by `HEAD_SAFETY_MARGIN`: if the planned crop would slice into any visible head, that candidate is rejected and another is drawn. A wall photo with half a face in it is worse than the same photo not on the wall at all.
`face_aware_crop` does the actual cropping resize-cropping to fill the frame while biasing the window around detected faces. A landscape shot with room around the subject usually crops cleanly to portrait this way; the guardrail above catches the ones that don't.
`face_aware_crop` does the actual cropping: resize-cropping to fill the frame while biasing the window around detected faces. A landscape shot with room around the subject usually crops cleanly to portrait this way; the guardrail above catches the ones that don't.
## Tuning the pipeline somewhere else
Iterating on the Pi means waiting 12+ seconds per refresh. Both the face-aware crop and the dither were tuned in Jupyter against a local pool of a few hundred photos, then frozen and shipped.
The dither is where the choice visibly matters. The panel can only show black, white, red, yellow, blue, green no intensity control, every pixel is one of those six. I compared Floyd-Steinberg, Stucki, and a couple of ordered variants. Atkinson kept the highest perceived contrast on the 6-colour palette without smearing skin tones into the nearest yellow. Pure-Python Atkinson on the Pi Zero was unusably slow, so the inner loop runs through `numba` with perceptual-weighted nearest-colour matching (0.299/0.587/0.114). Roughly 100x faster after the JIT cache warms.
The dither is where the choice visibly matters. The panel can only show black, white, red, yellow, blue, green; no intensity control, every pixel is one of those six. I compared Floyd-Steinberg, Stucki, and a couple of ordered variants. Atkinson kept the highest perceived contrast on the 6-colour palette without smearing skin tones into the nearest yellow. Pure-Python Atkinson on the Pi Zero was unusably slow, so the inner loop runs through `numba` with perceptual-weighted nearest-colour matching (0.299/0.587/0.114). Roughly 100x faster after the JIT cache warms.
## The weekend-reimplementable rule
Hundred Rabbits, a couple who live offshore on a sailboat doing permacomputing in practice, hold themselves to a rule: any system they depend on should be reimplementable in a weekend. The frame meets the bar. A few hundred lines of stdlib Python on a documented panel, reading from an HTTP endpoint that returns JPEGs. It came together over an afternoon with Claude Code plus a couple of weekends tuning the picker and the dither; the repo is public partly as a reference for anyone wanting to do something similar. If Immich disappears tomorrow the selection logic is eighty lines I can repoint at whatever replaces it.
This stopped being hobbyist territory around 2024, when researchers found family-blog photos of Brazilian children inside the LAION training set. Self-hosting your photos used to be a preference; it's becoming a safeguarding decision. Don't ask whether the hassle is worth it now. Ask what state you'd be in if any one of your platforms went dark and notice that this isn't a hypothetical. Nixplay's cloud-tied frames have bricked. Funimation deleted libraries people had paid for. The parenthetical in *useless when the company closes its doors* does the whole argument's work.
This stopped being hobbyist territory around 2024, when researchers found family-blog photos of Brazilian children inside the LAION training set. Self-hosting your photos used to be a preference; it's becoming a safeguarding decision. Don't ask whether the hassle is worth it now. Ask what state you'd be in if any one of your platforms went dark, and notice that this isn't a hypothetical. Nixplay's cloud-tied frames have bricked. Funimation deleted libraries people had paid for. The parenthetical in _useless when the company closes its doors_ does the whole argument's work.
## Smaller calls
- **Capture age and EXIF location painted as text.** White on a black stroke, written *after* dithering, so the labels stay sharp on the 6-colour palette.
- **Capture age and EXIF location painted as text.** White on a black stroke, written _after_ dithering, so the labels stay sharp on the 6-colour palette.
- **CLI flags for the awkward photos.** `--album`, `--people`, `-o 90` (portrait), and `--saturation`/`--contrast`/`--gamma` are flags on the cron command. The defaults are tuned for the average photo; the flags exist for the few that aren't.
- **`flock` around the render.** A slow refresh can't overlap the next 15-minute tick.
- **Wifi power-save reconnect job.** The Pi Zero 2W's wifi drops if power-save kicks in. A separate `wifi-check.sh` every five minutes brings it back.

View file

@ -8,7 +8,20 @@ thumbnail:
alt: The Perfect Postcode dashboard with active filters on property type, price, transit time, and crime, showing a Manchester map with matching properties highlighted as a heatmap.
tags: ['systems', 'web', 'tools']
role: Server architect and operator
stack: ['Rust', 'Axum', 'Polars', 'h3o', 'rayon', 'PocketBase', 'PMTiles', 'MapLibre', 'deck.gl', 'Conveyal R5', 'Gemini']
stack:
[
'Rust',
'Axum',
'Polars',
'h3o',
'rayon',
'PocketBase',
'PMTiles',
'MapLibre',
'deck.gl',
'Conveyal R5',
'Gemini',
]
scale: ~25M historical properties, ~2.5M postcodes, ~150 numeric features per row, all in RAM on a single VM
outcome: A single-binary UK property-intelligence service with sub-100ms hexagon aggregations under filter
audience: technical
@ -25,26 +38,26 @@ media:
**The short version:**
- One Rust binary (Axum, Polars, h3o, rayon) holds the entire UK property history in RAM: ~25M historical transactions, ~150 numeric features per row, plus postcode features, POIs, places, sparse travel-time matrices, and PMTiles. The whole resident set fits inside a VM you can rent.
- The hot loop dictates the data layout. Every numeric feature is u16-quantised against a per-feature `(min, scale)`. Filter evaluation per row, per filter, is `raw != NAN_U16 && raw >= min_u16 && raw <= max_u16` three integer compares, no floats, no decoding.
- The hot loop dictates the data layout. Every numeric feature is u16-quantised against a per-feature `(min, scale)`. Filter evaluation per row, per filter, is `raw != NAN_U16 && raw >= min_u16 && raw <= max_u16`: three integer compares, no floats, no decoding.
- An H3 cell is precomputed per property at resolution 12. A CSR-laid-out 0.01°-cell grid handles bbox queries. Aggregation goes serial under 50,000 candidate rows and parallel above it.
## The constraint that shapes everything
The answer to *"what's the median price in this hexagon, filtered to four-bedroom terraces under £450k with a 35-minute transit to Manchester"* needs to come back inside a single map pan. Per visible cell, per request, every time the user moves anything. That's the work.
The answer to _"what's the median price in this hexagon, filtered to four-bedroom terraces under £450k with a 35-minute transit to Manchester"_ needs to come back inside a single map pan. Per visible cell, per request, every time the user moves anything. That's the work.
At the resolution we want, the inputs are roughly 25M historical transactions, each with around 150 numeric features (price, EPC, deprivation deciles, school catchment metrics, POI proximities, noise, crime, …). Naively f32 per cell, that's ~15 GB before you count anything else postcodes, POIs, places, tiles, travel times. The rest of the architecture is the consequence of insisting it all lives in one process on one rentable box.
At the resolution we want, the inputs are roughly 25M historical transactions, each with around 150 numeric features (price, EPC, deprivation deciles, school catchment metrics, POI proximities, noise, crime, …). Naively f32 per cell, that's ~15 GB before you count anything else: postcodes, POIs, places, tiles, travel times. The rest of the architecture is the consequence of insisting it all lives in one process on one rentable box.
## u16 quantisation in a row-major flat array
Every numeric feature is encoded as `((value - feature_min) / feature_range) * 65534`. Dequant is `raw * dequant_a + quant_min`. `u16::MAX` is reserved as `NAN_U16` — the explicit missing-value sentinel — so the live range is 65534, not 65535. Per feature we keep a `(min, scale, p1, p99)` tuple and a 100-bucket histogram for the UI sliders.
Every numeric feature is encoded as `((value - feature_min) / feature_range) * 65534`. Dequant is `raw * dequant_a + quant_min`. `u16::MAX` is reserved as `NAN_U16` (the explicit missing-value sentinel), so the live range is 65534, not 65535. Per feature we keep a `(min, scale, p1, p99)` tuple and a 100-bucket histogram for the UI sliders.
Storage is a single `Vec<u16>` laid out row-major: `feature_data[row * num_features + feat_idx]`. Sixteen features fit in one 64-byte cache line; a row scan stays in L1 for several rows at a time. With 25M rows × ~150 features × 2 bytes, the property matrix is around 7.5 GB comfortably inside a 16 GB instance once the rest of the data joins it.
Storage is a single `Vec<u16>` laid out row-major: `feature_data[row * num_features + feat_idx]`. Sixteen features fit in one 64-byte cache line; a row scan stays in L1 for several rows at a time. With 25M rows × ~150 features × 2 bytes, the property matrix is around 7.5 GB, comfortably inside a 16 GB instance once the rest of the data joins it.
The precision loss is real but bounded: 0.010.1% per feature on the data we have, below the noise floor of any downstream statistic. The win is that the hot loop never touches an `f32`.
## The hot loop is three integer compares
`ParsedFilter` carries `min_u16` and `max_u16` the user's bounds requantised against the same per-feature `(min, scale)` at parse time. The row test is literal:
`ParsedFilter` carries `min_u16` and `max_u16`: the user's bounds requantised against the same per-feature `(min, scale)` at parse time. The row test is literal:
```rust
let raw = feature_data[base + filter.feat_idx];
@ -62,15 +75,15 @@ Two small parse-time choices made this fast in practice:
Two indexes, used for different things.
A 0.01° (~1 km) regular grid in CSR layout a single flat `values: Vec<u32>` of row indices and an `offsets: Vec<u32>` of per-cell starts answers bbox queries. CSR avoids the 24-byte-per-cell `Vec` header you'd pay with `Vec<Vec<u32>>`, which is the difference between a few MB and a few hundred MB at UK scale. `for_each_in_bounds` is the variant that skips the result allocation; aggregators stream into it directly.
A 0.01° (~1 km) regular grid in CSR layout (a single flat `values: Vec<u32>` of row indices and an `offsets: Vec<u32>` of per-cell starts) answers bbox queries. CSR avoids the 24-byte-per-cell `Vec` header you'd pay with `Vec<Vec<u32>>`, which is the difference between a few MB and a few hundred MB at UK scale. `for_each_in_bounds` is the variant that skips the result allocation; aggregators stream into it directly.
An H3 cell at resolution 12 is precomputed per property at boot, stored as `Vec<u64>`. Lower-resolution cells are derived via `CellIndex::parent()` fast and exact. The hexagon endpoint thresholds at `PARALLEL_THRESHOLD = 50_000`: below, plain serial aggregation; above, `rayon::par_chunks()` with `chunk = max(1000, rows / num_threads)`. Below the threshold, rayon's per-chunk overhead dominates the work it's parallelising it's worse than the obvious thing. Above, the slope flips.
An H3 cell at resolution 12 is precomputed per property at boot, stored as `Vec<u64>`. Lower-resolution cells are derived via `CellIndex::parent()`; fast and exact. The hexagon endpoint thresholds at `PARALLEL_THRESHOLD = 50_000`: below, plain serial aggregation; above, `rayon::par_chunks()` with `chunk = max(1000, rows / num_threads)`. Below the threshold, rayon's per-chunk overhead dominates the work it's parallelising; it's worse than the obvious thing. Above, the slope flips.
A small per-thread `FxHashMap<u64, u64>` H3 cache inside each rayon chunk takes care of properties touched by multiple aggregations within the same chunk.
## State is an Arc-clone away
`AppState` is large and immutable after the boot-time loads. `SharedState = RwLock<Arc<AppState>>` wraps it; every handler does `shared.load_state()` a brief read lock, an `Arc::clone`, no further lock contention for the request.
`AppState` is large and immutable after the boot-time loads. `SharedState = RwLock<Arc<AppState>>` wraps it; every handler does `shared.load_state()`: a brief read lock, an `Arc::clone`, no further lock contention for the request.
The standard read-mostly pattern, but worth naming for one reason: it makes hot-reloading the parquet trivial later. Build a new `AppState` from disk, take the write lock, swap the `Arc`, drop the old one when the last in-flight request finishes. None of the handlers need to change.
@ -80,13 +93,13 @@ On top of that there's a per-endpoint `ConcurrencyLimitLayer::new(N)`. The expen
For mutations that need exclusion (subscription state transitions, redeem-invite races), there is no Redis. Instead, `acquire_pocketbase_lock` does an optimistic create against a `locks` collection. If create succeeds, we own it; if it fails on conflict, we fetch the existing lock, check `expires_at_unix`, and if it's expired we delete and retry. Owner ID is a 24-char random string so stale-lock detection doesn't rely on host identity or wall-clock skew.
Release is a `Drop` handler that spawns a tokio task to delete the record async cleanup keeps the synchronous drop path free of I/O. 100 ms retry, 10-second acquire deadline. Coarse, but correct, audit-loggable in PocketBase, and adds zero new infrastructure to operate.
Release is a `Drop` handler that spawns a tokio task to delete the record; async cleanup keeps the synchronous drop path free of I/O. 100 ms retry, 10-second acquire deadline. Coarse, but correct, audit-loggable in PocketBase, and adds zero new infrastructure to operate.
## Cost-capping the LLM endpoint
The AI filter parser is a Gemini call. Two structural choices made it cheap enough to leave on:
- **One system prompt, computed once.** `build_system_prompt(features, mode_destinations)` runs at boot. The feature catalogue, the enum of available travel modes, the few-shot examples all concatenated once into a `String` on `AppState`. Every request reuses the same bytes, which Gemini's input cache likes.
- **One system prompt, computed once.** `build_system_prompt(features, mode_destinations)` runs at boot. The feature catalogue, the enum of available travel modes, the few-shot examples: all concatenated once into a `String` on `AppState`. Every request reuses the same bytes, which Gemini's input cache likes.
- **A `search_destinations` tool with a closed enum of modes.** The LLM doesn't get to invent place slugs. It can call the function; the server slugifies and resolves against the loaded travel-time directory using a word-overlap matcher tolerant of `kings-cross` vs `King's Cross`.
On top: a per-week token budget (`AI_FILTERS_WEEKLY_TOKEN_LIMIT = 10_000_000`) and a 2,000-token output cap. The budget is the actual cost guarantee; the per-call cap is belt-and-braces.
@ -100,13 +113,13 @@ On top: a per-week token budget (`AI_FILTERS_WEEKLY_TOKEN_LIMIT = 10_000_000`) a
- **`spawn_blocking` for Polars I/O.** Parquet scans are CPU-bound. They block the tokio executor if you let them; they don't if you don't.
- **`Box<[T]>` instead of `Vec<T>` for aggregator accumulators.** No `capacity` field, 8 bytes saved per slot. At hundreds of hexagons × six features per request it adds up.
- **String interning, three times.** Postcodes (~2.5M unique from 25M rows) live in a `lasso::RodeoReader`; each row stores a `Spur` (~4 bytes). Address tokens are flattened into one buffer with per-row `(offset, length)` arrays. The same pattern for enum value strings.
- **Free-zone bbox check, not point check.** Unlicensed queries must have their *entire* bbox inside `FREE_ZONE_BOUNDS`. Point-in-zone would be convenient and wrong — it would let users pan to anywhere from a free-zone centre.
- **Free-zone bbox check, not point check.** Unlicensed queries must have their _entire_ bbox inside `FREE_ZONE_BOUNDS`. Point-in-zone would be convenient and wrong; it would let users pan to anywhere from a free-zone centre.
- **Share-link bounds are server-computed.** `bounds_from_view(lat, lon, zoom)` derives the bbox from a UK-aware longitude/latitude span (`half_lat = half_lon * 0.6`) and clamps it. Legacy short URLs without server-stored bounds grant nothing.
## What I'd change
- **Pin the allocator.** I rely on `malloc_trim` to keep RSS predictable. A jemalloc with explicit purge would behave better than glibc plus periodic trimming, especially under sustained load.
- **One bench for the hot loop.** I trust the structure but I have no number for *filter throughput per row per filter under typical load*. That number would tell me when the u16 trick stops being enough.
- **One bench for the hot loop.** I trust the structure but I have no number for _filter throughput per row per filter under typical load_. That number would tell me when the u16 trick stops being enough.
- **Move free-zone bounds to PocketBase.** `FREE_ZONE_BOUNDS` is a `const`. It's been right for the demo region for a year. The next time it changes I'll regret hardcoding it.
- **A typed query DSL instead of `;;`-separated strings.** The current filter wire format is `name:min:max;;name:val1|val2`. Cheap to parse, awful to evolve. A small JSON envelope would survive the next feature.

View file

@ -6,7 +6,16 @@ thumbnail:
alt: The e-ink frame on the wall showing a dithered landscape photo with the capture age and EXIF location painted into the bottom corners.
period: '2026'
sortDate: 2026-05-01
technologies: ['Python', 'Raspberry Pi Zero 2W', 'Waveshare PhotoPainter', 'Immich', 'Home Assistant', 'numba', 'Atkinson dither']
technologies:
[
'Python',
'Raspberry Pi Zero 2W',
'Waveshare PhotoPainter',
'Immich',
'Home Assistant',
'numba',
'Atkinson dither',
]
selected: true
essay: frame-eink-photo-display
links:

View file

@ -6,7 +6,20 @@ thumbnail:
alt: The Perfect Postcode dashboard with active filters on property type, price, transit time, and crime, showing a Manchester map with matching properties as a heatmap.
period: '2026'
sortDate: 2026-05-01
technologies: ['Rust', 'Axum', 'Polars', 'h3o', 'rayon', 'PocketBase', 'PMTiles', 'MapLibre', 'deck.gl', 'Conveyal R5', 'Gemini']
technologies:
[
'Rust',
'Axum',
'Polars',
'h3o',
'rayon',
'PocketBase',
'PMTiles',
'MapLibre',
'deck.gl',
'Conveyal R5',
'Gemini',
]
selected: true
essay: perfect-postcode-rust-property-server
links:

View file

@ -52,10 +52,10 @@ const hasCode = !!post.body && /(^|[^`])`[^`\n]+`|```/m.test(post.body);
const h2Headings = headings.filter((h) => h.depth === 2);
const showToc = h2Headings.length >= 3;
// Don't repeat the banner image at the end PostThumbnail already rendered it.
// Don't repeat the banner image at the end; PostThumbnail already rendered it.
const thumbnailSrc = post.data.thumbnail.src.src;
const trailingMedia = post.data.media.filter(
(item) => item.type === 'video' || item.src.src !== thumbnailSrc,
(item) => item.type === 'video' || item.src.src !== thumbnailSrc
);
const personId = absoluteUrl('/about/#person');

View file

@ -139,6 +139,7 @@ const personJsonLd = buildPersonJsonLd({
widths={startingPointThumbnail.widths}
sizes={startingPointThumbnail.sizes}
ariaLabel={`Open article: ${post.data.title}`}
loading="eager"
/>
<div class="starting-point__body">
<h3>

View file

@ -26,7 +26,7 @@ const personJsonLd = buildPersonJsonLd();
<section class="home-intro">
<p class="eyebrow">Engineering notes</p>
<h1>
<span class="home-intro-name">Andras Schmelczer</span> software engineer. Writeups
<span class="home-intro-name">Andras Schmelczer</span>, software engineer. Writeups
of finished projects, with the tradeoffs left in.
</h1>
<p>