#!/bin/bash set -euo pipefail # Batch-compute travel times from all places to all England postcodes # for all transport modes (car, bicycle, walking, transit). # # Uses full England OSM + 2 GTFS feeds (BODS buses, National Rail). # R5's TransportNetwork.fromDirectory() picks up all .osm.pbf and .zip files. # # Uses each place as origin with all postcodes as destinations — R5 does one # routing computation per place, then reads off travel times to all postcodes. # For car/bicycle/walking this is symmetric (place->postcode = postcode->place). # # Output: property-data/travel-times/{mode}/ # - {index}.parquet files: (pcds VARCHAR, travel_minutes SMALLINT), one per place # - postcodes_ref.parquet: postcode order reference # - places_ref.parquet: place order reference # # Usage: # ./r5-java/run.sh [--paths] [--demo] # --paths records journey instructions (transit only, ~20x slower) # --demo only compute Bank + TCR, transit only (quick test) # --- Defaults --- THREADS=8 HEAP=40g NETWORK_DIR=property-data/r5-network OUTPUT_BASE=property-data/travel-times R5_DIR=r5-java PATHS_FLAG="" DEMO_FLAG="" # --- Parse args --- while [[ $# -gt 0 ]]; do case $1 in --threads) THREADS="$2"; shift 2 ;; --heap) HEAP="$2"; shift 2 ;; --network-dir) NETWORK_DIR="$2"; shift 2 ;; --output-dir) OUTPUT_BASE="$2"; shift 2 ;; --paths) PATHS_FLAG="--paths"; shift ;; --demo) DEMO_FLAG="--demo"; shift ;; *) echo "Unknown: $1"; exit 1 ;; esac done # --- Verify we're in project root --- if [ ! -f property-data/places.parquet ] || [ ! -f property-data/arcgis_data.parquet ]; then echo "Error: run from the property-map project root" exit 1 fi echo "=== R5 Batch Travel Times ===" echo "Threads: $THREADS | Heap: $HEAP" echo "" # --- Step 1: Download JDK if needed --- JDK_DIR="$R5_DIR/jdk" if [ ! -d "$JDK_DIR" ]; then echo "--- Downloading JDK 21 ---" ARCH=$(uname -m) case "$ARCH" in x86_64|amd64) JDK_ARCH="x64" ;; aarch64|arm64) JDK_ARCH="aarch64" ;; *) echo "Unsupported architecture: $ARCH"; exit 1 ;; esac JDK_URL="https://api.adoptium.net/v3/binary/latest/21/ga/linux/${JDK_ARCH}/jdk/hotspot/normal/eclipse" mkdir -p "$JDK_DIR" curl -fL "$JDK_URL" | tar xz --strip-components=1 -C "$JDK_DIR" fi export JAVA_HOME="$JDK_DIR" export PATH="$JAVA_HOME/bin:$PATH" # --- Step 2: Download library JARs --- LIB_DIR="$R5_DIR/lib" mkdir -p "$LIB_DIR" R5_JAR="$LIB_DIR/r5.jar" DUCKDB_JAR="$LIB_DIR/duckdb.jar" if [ ! -f "$R5_JAR" ]; then echo "--- Downloading R5 v7.5 fat JAR ---" curl -fL -o "$R5_JAR" https://github.com/conveyal/r5/releases/download/v7.5/r5-v7.5-all.jar fi if [ ! -f "$DUCKDB_JAR" ]; then echo "--- Downloading DuckDB JDBC ---" curl -fL -o "$DUCKDB_JAR" https://repo1.maven.org/maven2/org/duckdb/duckdb_jdbc/1.4.4.0/duckdb_jdbc-1.4.4.0.jar fi # --- Step 3: Compile Java source --- OUT_DIR="$R5_DIR/out" SRC_DIR="$R5_DIR/src/main/java/propertymap" NEEDS_COMPILE=false for src in "$SRC_DIR"/*.java; do class="$OUT_DIR/propertymap/$(basename "${src%.java}").class" if [ ! -f "$class" ] || [ "$src" -nt "$class" ]; then NEEDS_COMPILE=true break fi done if $NEEDS_COMPILE; then echo "--- Compiling Java source ---" rm -rf "$OUT_DIR" mkdir -p "$OUT_DIR" javac -cp "$LIB_DIR/*" -d "$OUT_DIR" "$SRC_DIR"/*.java fi # --- Step 4: Prepare network build directory --- # R5 writes .mapdb temp files next to OSM/GTFS files during network construction. # Copy source data to a writable build dir to avoid polluting the originals. mkdir -p "$NETWORK_DIR" OSM_PBF="property-data/england-latest.osm.pbf" TRANSIT_SRC="property-data/transit" NETWORK_DATA_DIR="$NETWORK_DIR/build" if [ ! -f "$NETWORK_DIR/network.dat" ]; then BUILD_DIR="$NETWORK_DIR/build" echo "--- No cached network — copying transit data to build dir ---" mkdir -p "$BUILD_DIR" if [ ! -f "$OSM_PBF" ]; then echo "Error: OSM PBF not found at $OSM_PBF" echo "Download it from https://download.geofabrik.de/europe/united-kingdom/england-latest.osm.pbf" exit 1 fi cp "$OSM_PBF" "$BUILD_DIR/" if ! cp "$TRANSIT_SRC"/*.zip "$BUILD_DIR/" 2>/dev/null; then echo "Warning: no GTFS .zip files found in $TRANSIT_SRC/ — transit routing will be unavailable" fi fi # --- Step 5: Run batch --- echo "" echo "--- Starting batch computation ---" DATA_DIR="$NETWORK_DATA_DIR" NETWORK_CACHE_DIR="$NETWORK_DIR" \ java -Xms"$HEAP" -Xmx"$HEAP" -cp "$OUT_DIR:$LIB_DIR/*" propertymap.App \ --postcodes property-data/arcgis_data.parquet \ --places property-data/places.parquet \ --output-dir "$OUTPUT_BASE" \ --threads "$THREADS" \ $PATHS_FLAG $DEMO_FLAG echo "" echo "=== Complete ===" echo "Output: $OUTPUT_BASE/{car,bicycle,walking,transit}/{place-name}.parquet" echo "Reference: $OUTPUT_BASE/postcodes_ref.parquet, $OUTPUT_BASE/places_ref.parquet"