144 lines
4.7 KiB
Bash
Executable file
144 lines
4.7 KiB
Bash
Executable file
#!/bin/bash
|
|
set -euo pipefail
|
|
|
|
# Batch-compute travel times from all places to all England postcodes
|
|
# for all transport modes (car, bicycle, walking, transit).
|
|
#
|
|
# Uses full England OSM + 2 GTFS feeds (BODS buses, National Rail).
|
|
# R5's TransportNetwork.fromDirectory() picks up all .osm.pbf and .zip files.
|
|
#
|
|
# Uses each place as origin with all postcodes as destinations — R5 does one
|
|
# routing computation per place, then reads off travel times to all postcodes.
|
|
# For car/bicycle/walking this is symmetric (place->postcode = postcode->place).
|
|
#
|
|
# Output: property-data/travel-times/{mode}/
|
|
# - {index}.parquet files: (pcds VARCHAR, travel_minutes SMALLINT), one per place
|
|
# - postcodes_ref.parquet: postcode order reference
|
|
# - places_ref.parquet: place order reference
|
|
#
|
|
# Usage:
|
|
# ./r5-java/run.sh [--paths] [--demo]
|
|
# --paths records journey instructions (transit only, ~20x slower)
|
|
# --demo only compute Bank + TCR, transit only (quick test)
|
|
|
|
# --- Defaults ---
|
|
THREADS=16
|
|
HEAP=16g
|
|
NETWORK_DIR=property-data/r5-network
|
|
OUTPUT_BASE=property-data/travel-times
|
|
R5_DIR=r5-java
|
|
PATHS_FLAG=""
|
|
DEMO_FLAG=""
|
|
|
|
# --- Parse args ---
|
|
while [[ $# -gt 0 ]]; do
|
|
case $1 in
|
|
--threads) THREADS="$2"; shift 2 ;;
|
|
--heap) HEAP="$2"; shift 2 ;;
|
|
--network-dir) NETWORK_DIR="$2"; shift 2 ;;
|
|
--output-dir) OUTPUT_BASE="$2"; shift 2 ;;
|
|
--paths) PATHS_FLAG="--paths"; shift ;;
|
|
--demo) DEMO_FLAG="--demo"; shift ;;
|
|
*) echo "Unknown: $1"; exit 1 ;;
|
|
esac
|
|
done
|
|
|
|
# --- Verify we're in project root ---
|
|
if [ ! -f property-data/places.parquet ] || [ ! -f property-data/arcgis_data.parquet ]; then
|
|
echo "Error: run from the property-map project root"
|
|
exit 1
|
|
fi
|
|
|
|
echo "=== R5 Batch Travel Times ==="
|
|
echo "Threads: $THREADS | Heap: $HEAP"
|
|
echo ""
|
|
|
|
# --- Step 1: Download JDK if needed ---
|
|
JDK_DIR="$R5_DIR/jdk"
|
|
if [ ! -d "$JDK_DIR" ]; then
|
|
echo "--- Downloading JDK 21 ---"
|
|
ARCH=$(uname -m)
|
|
case "$ARCH" in
|
|
x86_64|amd64) JDK_ARCH="x64" ;;
|
|
aarch64|arm64) JDK_ARCH="aarch64" ;;
|
|
*) echo "Unsupported architecture: $ARCH"; exit 1 ;;
|
|
esac
|
|
JDK_URL="https://api.adoptium.net/v3/binary/latest/21/ga/linux/${JDK_ARCH}/jdk/hotspot/normal/eclipse"
|
|
mkdir -p "$JDK_DIR"
|
|
curl -fL "$JDK_URL" | tar xz --strip-components=1 -C "$JDK_DIR"
|
|
fi
|
|
export JAVA_HOME="$JDK_DIR"
|
|
export PATH="$JAVA_HOME/bin:$PATH"
|
|
|
|
# --- Step 2: Download library JARs ---
|
|
LIB_DIR="$R5_DIR/lib"
|
|
mkdir -p "$LIB_DIR"
|
|
|
|
R5_JAR="$LIB_DIR/r5.jar"
|
|
DUCKDB_JAR="$LIB_DIR/duckdb.jar"
|
|
|
|
if [ ! -f "$R5_JAR" ]; then
|
|
echo "--- Downloading R5 v7.5 fat JAR ---"
|
|
curl -fL -o "$R5_JAR" https://github.com/conveyal/r5/releases/download/v7.5/r5-v7.5-all.jar
|
|
fi
|
|
|
|
if [ ! -f "$DUCKDB_JAR" ]; then
|
|
echo "--- Downloading DuckDB JDBC ---"
|
|
curl -fL -o "$DUCKDB_JAR" https://repo1.maven.org/maven2/org/duckdb/duckdb_jdbc/1.4.4.0/duckdb_jdbc-1.4.4.0.jar
|
|
fi
|
|
|
|
# --- Step 3: Compile Java source ---
|
|
OUT_DIR="$R5_DIR/out"
|
|
SRC_DIR="$R5_DIR/src/main/java/propertymap"
|
|
|
|
NEEDS_COMPILE=false
|
|
for src in "$SRC_DIR"/*.java; do
|
|
class="$OUT_DIR/propertymap/$(basename "${src%.java}").class"
|
|
if [ ! -f "$class" ] || [ "$src" -nt "$class" ]; then
|
|
NEEDS_COMPILE=true
|
|
break
|
|
fi
|
|
done
|
|
|
|
if $NEEDS_COMPILE; then
|
|
echo "--- Compiling Java source ---"
|
|
rm -rf "$OUT_DIR"
|
|
mkdir -p "$OUT_DIR"
|
|
javac -cp "$LIB_DIR/*" -d "$OUT_DIR" "$SRC_DIR"/*.java
|
|
fi
|
|
|
|
# --- Step 4: Prepare network build directory ---
|
|
# R5 writes .mapdb temp files next to OSM/GTFS files during network construction.
|
|
# Copy source data to a writable build dir to avoid polluting the originals.
|
|
mkdir -p "$NETWORK_DIR"
|
|
TRANSIT_SRC="property-data/transit"
|
|
NETWORK_DATA_DIR="$TRANSIT_SRC"
|
|
|
|
if [ ! -f "$NETWORK_DIR/network.dat" ]; then
|
|
BUILD_DIR="$NETWORK_DIR/build"
|
|
echo "--- No cached network — copying transit data to build dir ---"
|
|
mkdir -p "$BUILD_DIR"
|
|
if ! cp "$TRANSIT_SRC"/raw/*.osm.pbf "$BUILD_DIR/" 2>/dev/null; then
|
|
echo "Warning: no .osm.pbf files found in $TRANSIT_SRC/raw/"
|
|
fi
|
|
if ! cp "$TRANSIT_SRC"/*.zip "$BUILD_DIR/" 2>/dev/null; then
|
|
echo "Warning: no .zip files found in $TRANSIT_SRC/"
|
|
fi
|
|
NETWORK_DATA_DIR="$BUILD_DIR"
|
|
fi
|
|
|
|
# --- Step 5: Run batch ---
|
|
echo ""
|
|
echo "--- Starting batch computation ---"
|
|
DATA_DIR="$NETWORK_DATA_DIR" NETWORK_CACHE_DIR="$NETWORK_DIR" \
|
|
java -Xmx"$HEAP" -cp "$OUT_DIR:$LIB_DIR/*" propertymap.App \
|
|
--postcodes property-data/arcgis_data.parquet \
|
|
--places property-data/places.parquet \
|
|
--output-dir "$OUTPUT_BASE" \
|
|
--threads "$THREADS" \
|
|
$PATHS_FLAG $DEMO_FLAG
|
|
|
|
echo ""
|
|
echo "=== Complete ==="
|
|
echo "Output: $OUTPUT_BASE/{car,bicycle,walking,transit}/{place-name}.parquet"
|
|
echo "Reference: $OUTPUT_BASE/postcodes_ref.parquet, $OUTPUT_BASE/places_ref.parquet"
|