Fix data pipelines once and for all

This commit is contained in:
Andras Schmelczer 2026-06-10 21:27:32 +01:00
parent 08560476c5
commit 4012e4e047
46 changed files with 4508 additions and 855 deletions

View file

@ -260,6 +260,12 @@ def main() -> None:
)
args = parser.parse_args()
if args.greenspace and not args.greenspace.exists():
# Fail loudly and EARLY (before the ~10h Phases 1-3): silently skipping
# the subtraction is exactly how parks/lakes shipped inside postcode
# boundaries unnoticed.
raise SystemExit(f"--greenspace file not found: {args.greenspace}")
fragments_cache = args.output / "fragments_cache.parquet"
# Phase 3 depends only on these inputs; greenspace is applied later (Phase 4),
# so a greenspace change must not invalidate the fragment cache.
@ -294,7 +300,7 @@ def main() -> None:
greenspace_tree = None
greenspace_geoms = None
if args.greenspace and args.greenspace.exists():
if args.greenspace:
from .greenspace import load_greenspace
print(f" Loading greenspace/water from {args.greenspace}...")