"""Persist Phase-3 output (the per-postcode fragments) so a crash in the later merge/write phases can resume in seconds instead of re-running the ~10-hour OA loop. Phase 3 turns OA boundaries + INSPIRE parcels + UPRN points into ~1.8M ``(postcode, geometry)`` fragments held only in memory. Everything after it (merge, simplify, GeoJSON write) is cheap but failure-prone -- a single degenerate postcode used to abort the whole run *after* those 10 hours. Caching the fragments to disk decouples the expensive computation from the fragile output stage. Fragments are stored as one parquet file with two columns: ``postcode`` (string) and ``wkb`` (binary Shapely WKB). Writes are atomic (temp file + ``os.replace``) so an interrupted write never leaves a cache that passes the freshness check. The cache is validated against its upstream inputs by mtime: if any input is newer than the cache it is treated as stale and ignored, mirroring make's own freshness logic. """ from __future__ import annotations import os from pathlib import Path import numpy as np import polars as pl import shapely from shapely.geometry.base import BaseGeometry Fragment = tuple[str, BaseGeometry] def _tmp_path(cache_path: Path) -> Path: return cache_path.parent / (cache_path.name + ".tmp") def fragments_cache_is_fresh( cache_path: Path, inputs: list[Path | None] ) -> bool: """True if ``cache_path`` exists and is newer than every input that exists. A missing input is ignored (it cannot have changed the cache); a ``None`` input is skipped. Any existing input newer than the cache marks it stale. """ if not cache_path.exists(): return False cache_mtime = cache_path.stat().st_mtime for inp in inputs: if inp is None: continue path = Path(inp) if path.exists() and path.stat().st_mtime > cache_mtime: return False return True def save_fragments(cache_path: Path, fragments: list[Fragment]) -> None: """Atomically write ``(postcode, geometry)`` fragments to a parquet cache.""" postcodes = [pc for pc, _ in fragments] geoms = np.array([geom for _, geom in fragments], dtype=object) wkb = shapely.to_wkb(geoms) frame = pl.DataFrame( {"postcode": postcodes, "wkb": list(wkb)}, schema={"postcode": pl.Utf8, "wkb": pl.Binary}, ) cache_path.parent.mkdir(parents=True, exist_ok=True) tmp = _tmp_path(cache_path) frame.write_parquet(tmp, compression="zstd") os.replace(tmp, cache_path) def load_fragments(cache_path: Path) -> list[Fragment]: """Read fragments written by :func:`save_fragments` back into memory.""" frame = pl.read_parquet(cache_path) postcodes = frame["postcode"].to_list() geoms = shapely.from_wkb(frame["wkb"].to_list()) return list(zip(postcodes, geoms))