This commit is contained in:
Andras Schmelczer 2026-02-15 09:48:30 +00:00
parent 128b3191e7
commit 03445188ea
54 changed files with 596953 additions and 3577 deletions

129
r5-java/run.sh Executable file
View file

@ -0,0 +1,129 @@
#!/bin/bash
set -euo pipefail
# Batch-compute travel times from all places to all England postcodes
# for all transport modes (car, bicycle, walking, transit).
#
# Uses each place as origin with all postcodes as destinations — R5 does one
# routing computation per place, then reads off travel times to all postcodes.
# For car/bicycle/walking this is symmetric (place->postcode = postcode->place).
#
# Output: property-data/travel-times/{mode}/
# - {index}.parquet files: (pcds VARCHAR, travel_minutes SMALLINT), one per place
# - postcodes_ref.parquet: postcode order reference
# - places_ref.parquet: place order reference
#
# Usage:
# ./r5-java/run.sh # 4 threads, 16g heap
# ./r5-java/run.sh --threads 8
# ./r5-java/run.sh --heap 24g
# --- Defaults ---
THREADS=28
HEAP=40g
NETWORK_DIR=property-data/r5-network
OUTPUT_BASE=property-data/travel-times
R5_DIR=r5-java
# --- Parse args ---
while [[ $# -gt 0 ]]; do
case $1 in
--threads) THREADS="$2"; shift 2 ;;
--heap) HEAP="$2"; shift 2 ;;
--network-dir) NETWORK_DIR="$2"; shift 2 ;;
*) echo "Unknown: $1"; exit 1 ;;
esac
done
# --- Verify we're in project root ---
if [ ! -f property-data/places.parquet ] || [ ! -f property-data/arcgis_data.parquet ]; then
echo "Error: run from the property-map project root"
exit 1
fi
echo "=== R5 Batch Travel Times ==="
echo "Threads: $THREADS | Heap: $HEAP"
echo ""
# --- Step 1: Download JDK if needed ---
JDK_DIR="$R5_DIR/jdk"
if [ ! -d "$JDK_DIR" ]; then
echo "--- Downloading JDK 21 ---"
ARCH=$(uname -m)
case "$ARCH" in
x86_64|amd64) JDK_ARCH="x64" ;;
aarch64|arm64) JDK_ARCH="aarch64" ;;
*) echo "Unsupported architecture: $ARCH"; exit 1 ;;
esac
JDK_URL="https://api.adoptium.net/v3/binary/latest/21/ga/linux/${JDK_ARCH}/jdk/hotspot/normal/eclipse"
mkdir -p "$JDK_DIR"
curl -fL "$JDK_URL" | tar xz --strip-components=1 -C "$JDK_DIR"
fi
export JAVA_HOME="$JDK_DIR"
export PATH="$JAVA_HOME/bin:$PATH"
# --- Step 2: Download library JARs ---
LIB_DIR="$R5_DIR/lib"
mkdir -p "$LIB_DIR"
R5_JAR="$LIB_DIR/r5.jar"
DUCKDB_JAR="$LIB_DIR/duckdb.jar"
if [ ! -f "$R5_JAR" ]; then
echo "--- Downloading R5 v7.5 fat JAR ---"
curl -fL -o "$R5_JAR" https://github.com/conveyal/r5/releases/download/v7.5/r5-v7.5-all.jar
fi
if [ ! -f "$DUCKDB_JAR" ]; then
echo "--- Downloading DuckDB JDBC ---"
curl -fL -o "$DUCKDB_JAR" https://repo1.maven.org/maven2/org/duckdb/duckdb_jdbc/1.0.0/duckdb_jdbc-1.0.0.jar
fi
# --- Step 3: Compile Java source ---
OUT_DIR="$R5_DIR/out"
SRC_DIR="$R5_DIR/src/main/java/propertymap"
NEEDS_COMPILE=false
for src in "$SRC_DIR"/*.java; do
class="$OUT_DIR/propertymap/$(basename "${src%.java}").class"
if [ ! -f "$class" ] || [ "$src" -nt "$class" ]; then
NEEDS_COMPILE=true
break
fi
done
if $NEEDS_COMPILE; then
echo "--- Compiling Java source ---"
mkdir -p "$OUT_DIR"
javac -cp "$LIB_DIR/*" -d "$OUT_DIR" "$SRC_DIR"/*.java
fi
# --- Step 4: Prepare network build directory ---
# R5 writes .mapdb temp files next to OSM/GTFS files during network construction.
# Copy source data to a writable build dir to avoid polluting the originals.
mkdir -p "$NETWORK_DIR"
DATA_DIR="property-data/transit"
if [ ! -f "$NETWORK_DIR/network.dat" ]; then
BUILD_DIR="$NETWORK_DIR/build"
echo "--- No cached network — copying transit data to build dir ---"
mkdir -p "$BUILD_DIR"
cp property-data/transit/raw/*.osm.pbf "$BUILD_DIR/" 2>/dev/null || true
cp property-data/transit/*.zip "$BUILD_DIR/" 2>/dev/null || true
DATA_DIR="$BUILD_DIR"
fi
# --- Step 5: Run batch ---
echo ""
echo "--- Starting batch computation ---"
DATA_DIR="$DATA_DIR" NETWORK_CACHE_DIR="$NETWORK_DIR" \
java -Xmx"$HEAP" -cp "$OUT_DIR:$LIB_DIR/*" propertymap.App \
--postcodes property-data/arcgis_data.parquet \
--places property-data/places.parquet \
--output-dir "$OUTPUT_BASE" \
--threads "$THREADS"
echo ""
echo "=== Complete ==="
echo "Output: $OUTPUT_BASE/{car,bicycle,walking,transit}/"
echo "Reference: $OUTPUT_BASE/postcodes_ref.parquet, $OUTPUT_BASE/places_ref.parquet"