perfect-postcode/r5-java/run.sh

148 lines
4.9 KiB
Bash
Executable file

#!/bin/bash
set -euo pipefail
# Batch-compute travel times from all places to all England postcodes
# for all transport modes (car, bicycle, walking, transit).
#
# Uses full England OSM + 2 GTFS feeds (BODS buses, National Rail).
# R5's TransportNetwork.fromDirectory() picks up all .osm.pbf and .zip files.
#
# Uses each place as origin with all postcodes as destinations — R5 does one
# routing computation per place, then reads off travel times to all postcodes.
# For car/bicycle/walking this is symmetric (place->postcode = postcode->place).
#
# Output: property-data/travel-times/{mode}/
# - {index}.parquet files: (pcds VARCHAR, travel_minutes SMALLINT), one per place
# - postcodes_ref.parquet: postcode order reference
# - places_ref.parquet: place order reference
#
# Usage:
# ./r5-java/run.sh [--paths] [--demo]
# --paths records journey instructions (transit only, ~20x slower)
# --demo only compute Bank + TCR, transit only (quick test)
# --- Defaults ---
THREADS=8
HEAP=40g
NETWORK_DIR=property-data/r5-network
OUTPUT_BASE=property-data/travel-times
R5_DIR=r5-java
PATHS_FLAG=""
DEMO_FLAG=""
# --- Parse args ---
while [[ $# -gt 0 ]]; do
case $1 in
--threads) THREADS="$2"; shift 2 ;;
--heap) HEAP="$2"; shift 2 ;;
--network-dir) NETWORK_DIR="$2"; shift 2 ;;
--output-dir) OUTPUT_BASE="$2"; shift 2 ;;
--paths) PATHS_FLAG="--paths"; shift ;;
--demo) DEMO_FLAG="--demo"; shift ;;
*) echo "Unknown: $1"; exit 1 ;;
esac
done
# --- Verify we're in project root ---
if [ ! -f property-data/places.parquet ] || [ ! -f property-data/arcgis_data.parquet ]; then
echo "Error: run from the property-map project root"
exit 1
fi
echo "=== R5 Batch Travel Times ==="
echo "Threads: $THREADS | Heap: $HEAP"
echo ""
# --- Step 1: Download JDK if needed ---
JDK_DIR="$R5_DIR/jdk"
if [ ! -d "$JDK_DIR" ]; then
echo "--- Downloading JDK 21 ---"
ARCH=$(uname -m)
case "$ARCH" in
x86_64|amd64) JDK_ARCH="x64" ;;
aarch64|arm64) JDK_ARCH="aarch64" ;;
*) echo "Unsupported architecture: $ARCH"; exit 1 ;;
esac
JDK_URL="https://api.adoptium.net/v3/binary/latest/21/ga/linux/${JDK_ARCH}/jdk/hotspot/normal/eclipse"
mkdir -p "$JDK_DIR"
curl -fL "$JDK_URL" | tar xz --strip-components=1 -C "$JDK_DIR"
fi
export JAVA_HOME="$JDK_DIR"
export PATH="$JAVA_HOME/bin:$PATH"
# --- Step 2: Download library JARs ---
LIB_DIR="$R5_DIR/lib"
mkdir -p "$LIB_DIR"
R5_JAR="$LIB_DIR/r5.jar"
DUCKDB_JAR="$LIB_DIR/duckdb.jar"
if [ ! -f "$R5_JAR" ]; then
echo "--- Downloading R5 v7.5 fat JAR ---"
curl -fL -o "$R5_JAR" https://github.com/conveyal/r5/releases/download/v7.5/r5-v7.5-all.jar
fi
if [ ! -f "$DUCKDB_JAR" ]; then
echo "--- Downloading DuckDB JDBC ---"
curl -fL -o "$DUCKDB_JAR" https://repo1.maven.org/maven2/org/duckdb/duckdb_jdbc/1.4.4.0/duckdb_jdbc-1.4.4.0.jar
fi
# --- Step 3: Compile Java source ---
OUT_DIR="$R5_DIR/out"
SRC_DIR="$R5_DIR/src/main/java/propertymap"
NEEDS_COMPILE=false
for src in "$SRC_DIR"/*.java; do
class="$OUT_DIR/propertymap/$(basename "${src%.java}").class"
if [ ! -f "$class" ] || [ "$src" -nt "$class" ]; then
NEEDS_COMPILE=true
break
fi
done
if $NEEDS_COMPILE; then
echo "--- Compiling Java source ---"
rm -rf "$OUT_DIR"
mkdir -p "$OUT_DIR"
javac -cp "$LIB_DIR/*" -d "$OUT_DIR" "$SRC_DIR"/*.java
fi
# --- Step 4: Prepare network build directory ---
# R5 writes .mapdb temp files next to OSM/GTFS files during network construction.
# Copy source data to a writable build dir to avoid polluting the originals.
mkdir -p "$NETWORK_DIR"
OSM_PBF="property-data/england-latest.osm.pbf"
TRANSIT_SRC="property-data/transit"
NETWORK_DATA_DIR="$NETWORK_DIR/build"
if [ ! -f "$NETWORK_DIR/network.dat" ]; then
BUILD_DIR="$NETWORK_DIR/build"
echo "--- No cached network — copying transit data to build dir ---"
mkdir -p "$BUILD_DIR"
if [ ! -f "$OSM_PBF" ]; then
echo "Error: OSM PBF not found at $OSM_PBF"
echo "Download it from https://download.geofabrik.de/europe/united-kingdom/england-latest.osm.pbf"
exit 1
fi
cp "$OSM_PBF" "$BUILD_DIR/"
if ! cp "$TRANSIT_SRC"/*.zip "$BUILD_DIR/" 2>/dev/null; then
echo "Warning: no GTFS .zip files found in $TRANSIT_SRC/ — transit routing would be unavailable"
exit 1
fi
fi
# --- Step 5: Run batch ---
echo ""
echo "--- Starting batch computation ---"
DATA_DIR="$NETWORK_DATA_DIR" NETWORK_CACHE_DIR="$NETWORK_DIR" \
java -Xms"$HEAP" -Xmx"$HEAP" -cp "$OUT_DIR:$LIB_DIR/*" propertymap.App \
--postcodes property-data/arcgis_data.parquet \
--places property-data/places.parquet \
--output-dir "$OUTPUT_BASE" \
--threads "$THREADS" \
$PATHS_FLAG $DEMO_FLAG
echo ""
echo "=== Complete ==="
echo "Output: $OUTPUT_BASE/{car,bicycle,walking,transit}/{place-name}.parquet"
echo "Reference: $OUTPUT_BASE/postcodes_ref.parquet, $OUTPUT_BASE/places_ref.parquet"