This commit is contained in:
Andras Schmelczer 2026-02-15 09:48:30 +00:00
parent 128b3191e7
commit 03445188ea
54 changed files with 596953 additions and 3577 deletions

3
r5-java/.gitignore vendored Normal file
View file

@ -0,0 +1,3 @@
jdk/
lib/
out/

View file

@ -1,20 +0,0 @@
FROM eclipse-temurin:21-jdk AS build
WORKDIR /app
# Download pre-built R5 fat JAR from GitHub Releases (includes all R5 deps)
ADD https://github.com/conveyal/r5/releases/download/v7.5/r5-v7.5-all.jar /app/lib/r5.jar
# Gson for JSON (HTTP server is built into JDK)
ADD https://repo1.maven.org/maven2/com/google/code/gson/gson/2.11.0/gson-2.11.0.jar /app/lib/gson.jar
COPY src/ src/
RUN javac -cp "lib/*" -d out src/main/java/propertymap/App.java
FROM eclipse-temurin:21-jre
WORKDIR /app
RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
COPY --from=build /app/lib/ /app/lib/
COPY --from=build /app/out/ /app/out/
COPY entrypoint.sh /app/entrypoint.sh
RUN chmod +x /app/entrypoint.sh
ENTRYPOINT ["/app/entrypoint.sh"]

View file

@ -1,18 +0,0 @@
#!/bin/bash
set -e
TRANSIT_DIR=$DATA_DIR
NETWORK_DIR=$NETWORK_CACHE_DIR
BUILD_DIR="$NETWORK_DIR/build"
# If no cached network yet, copy transit data to a writable location for the build.
# R5 writes temp files (.mapdb) next to the OSM/GTFS files during network construction.
if [ ! -f "$NETWORK_DIR/network.dat" ]; then
echo "No cached network — copying transit data to writable build dir..."
mkdir -p "$BUILD_DIR"
cp "$OSM_DIR"/*.osm.pbf "$BUILD_DIR/" 2>/dev/null || true
cp "$TRANSIT_DIR"/*.zip "$BUILD_DIR/" 2>/dev/null || true
export DATA_DIR="$BUILD_DIR"
fi
exec java -Xmx16g -cp "out:lib/*" propertymap.App

129
r5-java/run.sh Executable file
View file

@ -0,0 +1,129 @@
#!/bin/bash
set -euo pipefail
# Batch-compute travel times from all places to all England postcodes
# for all transport modes (car, bicycle, walking, transit).
#
# Uses each place as origin with all postcodes as destinations — R5 does one
# routing computation per place, then reads off travel times to all postcodes.
# For car/bicycle/walking this is symmetric (place->postcode = postcode->place).
#
# Output: property-data/travel-times/{mode}/
# - {index}.parquet files: (pcds VARCHAR, travel_minutes SMALLINT), one per place
# - postcodes_ref.parquet: postcode order reference
# - places_ref.parquet: place order reference
#
# Usage:
# ./r5-java/run.sh # 4 threads, 16g heap
# ./r5-java/run.sh --threads 8
# ./r5-java/run.sh --heap 24g
# --- Defaults ---
THREADS=28
HEAP=40g
NETWORK_DIR=property-data/r5-network
OUTPUT_BASE=property-data/travel-times
R5_DIR=r5-java
# --- Parse args ---
while [[ $# -gt 0 ]]; do
case $1 in
--threads) THREADS="$2"; shift 2 ;;
--heap) HEAP="$2"; shift 2 ;;
--network-dir) NETWORK_DIR="$2"; shift 2 ;;
*) echo "Unknown: $1"; exit 1 ;;
esac
done
# --- Verify we're in project root ---
if [ ! -f property-data/places.parquet ] || [ ! -f property-data/arcgis_data.parquet ]; then
echo "Error: run from the property-map project root"
exit 1
fi
echo "=== R5 Batch Travel Times ==="
echo "Threads: $THREADS | Heap: $HEAP"
echo ""
# --- Step 1: Download JDK if needed ---
JDK_DIR="$R5_DIR/jdk"
if [ ! -d "$JDK_DIR" ]; then
echo "--- Downloading JDK 21 ---"
ARCH=$(uname -m)
case "$ARCH" in
x86_64|amd64) JDK_ARCH="x64" ;;
aarch64|arm64) JDK_ARCH="aarch64" ;;
*) echo "Unsupported architecture: $ARCH"; exit 1 ;;
esac
JDK_URL="https://api.adoptium.net/v3/binary/latest/21/ga/linux/${JDK_ARCH}/jdk/hotspot/normal/eclipse"
mkdir -p "$JDK_DIR"
curl -fL "$JDK_URL" | tar xz --strip-components=1 -C "$JDK_DIR"
fi
export JAVA_HOME="$JDK_DIR"
export PATH="$JAVA_HOME/bin:$PATH"
# --- Step 2: Download library JARs ---
LIB_DIR="$R5_DIR/lib"
mkdir -p "$LIB_DIR"
R5_JAR="$LIB_DIR/r5.jar"
DUCKDB_JAR="$LIB_DIR/duckdb.jar"
if [ ! -f "$R5_JAR" ]; then
echo "--- Downloading R5 v7.5 fat JAR ---"
curl -fL -o "$R5_JAR" https://github.com/conveyal/r5/releases/download/v7.5/r5-v7.5-all.jar
fi
if [ ! -f "$DUCKDB_JAR" ]; then
echo "--- Downloading DuckDB JDBC ---"
curl -fL -o "$DUCKDB_JAR" https://repo1.maven.org/maven2/org/duckdb/duckdb_jdbc/1.0.0/duckdb_jdbc-1.0.0.jar
fi
# --- Step 3: Compile Java source ---
OUT_DIR="$R5_DIR/out"
SRC_DIR="$R5_DIR/src/main/java/propertymap"
NEEDS_COMPILE=false
for src in "$SRC_DIR"/*.java; do
class="$OUT_DIR/propertymap/$(basename "${src%.java}").class"
if [ ! -f "$class" ] || [ "$src" -nt "$class" ]; then
NEEDS_COMPILE=true
break
fi
done
if $NEEDS_COMPILE; then
echo "--- Compiling Java source ---"
mkdir -p "$OUT_DIR"
javac -cp "$LIB_DIR/*" -d "$OUT_DIR" "$SRC_DIR"/*.java
fi
# --- Step 4: Prepare network build directory ---
# R5 writes .mapdb temp files next to OSM/GTFS files during network construction.
# Copy source data to a writable build dir to avoid polluting the originals.
mkdir -p "$NETWORK_DIR"
DATA_DIR="property-data/transit"
if [ ! -f "$NETWORK_DIR/network.dat" ]; then
BUILD_DIR="$NETWORK_DIR/build"
echo "--- No cached network — copying transit data to build dir ---"
mkdir -p "$BUILD_DIR"
cp property-data/transit/raw/*.osm.pbf "$BUILD_DIR/" 2>/dev/null || true
cp property-data/transit/*.zip "$BUILD_DIR/" 2>/dev/null || true
DATA_DIR="$BUILD_DIR"
fi
# --- Step 5: Run batch ---
echo ""
echo "--- Starting batch computation ---"
DATA_DIR="$DATA_DIR" NETWORK_CACHE_DIR="$NETWORK_DIR" \
java -Xmx"$HEAP" -cp "$OUT_DIR:$LIB_DIR/*" propertymap.App \
--postcodes property-data/arcgis_data.parquet \
--places property-data/places.parquet \
--output-dir "$OUTPUT_BASE" \
--threads "$THREADS"
echo ""
echo "=== Complete ==="
echo "Output: $OUTPUT_BASE/{car,bicycle,walking,transit}/"
echo "Reference: $OUTPUT_BASE/postcodes_ref.parquet, $OUTPUT_BASE/places_ref.parquet"

View file

@ -1,223 +1,208 @@
package propertymap;
import com.conveyal.r5.OneOriginResult;
import com.conveyal.r5.analyst.FreeFormPointSet;
import com.conveyal.r5.analyst.PointSet;
import com.conveyal.r5.analyst.TravelTimeComputer;
import com.conveyal.r5.analyst.WebMercatorExtents;
import com.conveyal.r5.analyst.cluster.RegionalTask;
import com.conveyal.r5.analyst.cluster.TravelTimeResult;
import com.conveyal.r5.api.util.LegMode;
import com.conveyal.r5.api.util.TransitModes;
import com.conveyal.r5.kryo.KryoNetworkSerializer;
import com.conveyal.r5.transit.TransportNetwork;
import com.google.gson.Gson;
import com.sun.net.httpserver.HttpExchange;
import com.sun.net.httpserver.HttpServer;
import org.locationtech.jts.geom.Coordinate;
import org.duckdb.DuckDBConnection;
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.net.InetSocketAddress;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.time.LocalDate;
import java.util.EnumSet;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
/**
* Batch-compute travel times from each origin (place) to all destinations (postcodes)
* for all transport modes (car, bicycle, walking, transit).
*
* Output per mode: one parquet file per origin in {output-dir}/{mode}/{index}.parquet
* with columns (pcds VARCHAR, travel_minutes SMALLINT). -1 = unreachable within 120 min.
*/
public class App {
private static TransportNetwork network;
private static final Gson gson = new Gson();
static class TravelTimeRequest {
double[] origin; // [lat, lon]
double[][] destinations; // [[lat, lon], ...]
String mode; // "transit", "car", "bicycle", "walking"
}
static class TravelTimeResponse {
double[] travel_times; // minutes, -1 = unreachable
}
private static final String[] MODES = {"car", "bicycle", "walking", "transit"};
private static final int MAX_RETRIES = 2;
public static void main(String[] args) throws Exception {
String dataDir = System.getenv("DATA_DIR");
String postcodesPath = requiredArg(args, "--postcodes");
String placesPath = requiredArg(args, "--places");
String outputDirStr = requiredArg(args, "--output-dir");
int threads = Integer.parseInt(optionalArg(args, "--threads", "4"));
if (dataDir == null) {
System.err.println("Error: DATA_DIR environment variable not set");
System.exit(1);
}
Path outDir = Paths.get(outputDirStr);
Files.createDirectories(outDir);
String networkCacheDir = System.getenv("NETWORK_CACHE_DIR");
if (networkCacheDir == null) {
System.err.println("Error: NETWORK_CACHE_DIR environment variable not set");
System.exit(1);
}
LocalDate today = LocalDate.now();
TransportNetwork network = Router.loadNetwork(requiredEnv("DATA_DIR"), requiredEnv("NETWORK_CACHE_DIR"));
System.out.println("Loading transport network from " + dataDir);
System.out.println("Network cache dir: " + networkCacheDir);
System.err.println("Loading postcodes (England only)...");
Parquet.Postcodes postcodes = Parquet.loadEnglandPostcodes(
postcodesPath, outDir.resolve("postcodes_ref.parquet"));
int nDest = postcodes.lats().length;
System.err.printf(" %,d postcodes%n", nDest);
File cacheFile = new File(networkCacheDir, "network.dat");
if (cacheFile.exists()) {
System.out.println("Loading cached network from " + cacheFile);
network = KryoNetworkSerializer.read(cacheFile);
} else {
System.out.println("Building network (first run, this takes a few minutes)...");
network = TransportNetwork.fromDirectory(new File(dataDir));
new File(networkCacheDir).mkdirs();
KryoNetworkSerializer.write(network, cacheFile);
System.out.println("Network cached to " + cacheFile);
}
List<Router.DestinationChunk> chunks = Router.buildDestinationChunks(postcodes.lats(), postcodes.lons());
// Build stop-to-vertex distance tables (needed for egress routing in transit mode).
// Not built by fromDirectory() and too large to fit in the Kryo cache with 4GB heap.
System.out.println("Building stop-to-vertex distance tables...");
network.transitLayer.buildDistanceTables(null);
System.out.println("Distance tables built");
System.err.println("Loading places (deduplicated)...");
double[][] placesLatLon = Parquet.loadPlaces(placesPath, outDir.resolve("places_ref.parquet"));
double[] originLats = placesLatLon[0], originLons = placesLatLon[1];
int nOrigins = originLats.length;
System.err.printf(" %,d places%n", nOrigins);
System.err.printf(" Estimated output: %.1f GB (%,d x %,d x 2B)%n",
(double) nOrigins * nDest * 2 / 1e9, nOrigins, nDest);
System.out.println("Transport network loaded successfully");
HttpServer server = HttpServer.create(new InetSocketAddress(8003), 0);
server.createContext("/health", exchange -> {
sendResponse(exchange, 200, "ok");
// One thread pool shared across all modes
ExecutorService pool = Executors.newFixedThreadPool(threads);
// One DuckDB connection per thread, reused across all writes
ThreadLocal<DuckDBConnection> threadConn = ThreadLocal.withInitial(() -> {
try { return Parquet.connect(); }
catch (Exception e) { throw new RuntimeException(e); }
});
server.createContext("/travel-times", exchange -> {
if (!"POST".equals(exchange.getRequestMethod())) {
sendResponse(exchange, 405, "Method not allowed");
return;
try {
for (String mode : MODES) {
processMode(network, chunks, postcodes.codes(), originLats, originLons,
nDest, outDir, mode, today, pool, threadConn);
}
} finally {
pool.shutdown();
pool.awaitTermination(Long.MAX_VALUE, TimeUnit.MILLISECONDS);
}
}
private static void processMode(
TransportNetwork network, List<Router.DestinationChunk> chunks,
String[] postcodes, double[] originLats, double[] originLons, int nDest,
Path outDir, String mode, LocalDate date,
ExecutorService pool, ThreadLocal<DuckDBConnection> threadConn) throws Exception {
int nOrigins = originLats.length;
System.err.printf("%n=== %s ===%n", mode.toUpperCase());
Path modeDir = outDir.resolve(mode);
Files.createDirectories(modeDir);
List<Integer> remaining = findRemaining(modeDir, nOrigins);
int alreadyDone = nOrigins - remaining.size();
System.err.printf(" %,d done, %,d remaining%n", alreadyDone, remaining.size());
if (remaining.isEmpty()) {
System.err.println(" All origins completed for this mode!");
return;
}
long startMs = System.currentTimeMillis();
int total = remaining.size();
AtomicInteger completed = new AtomicInteger(0);
AtomicInteger failed = new AtomicInteger(0);
// Progress reporter on a timer instead of per-task stderr writes
ScheduledExecutorService reporter = Executors.newSingleThreadScheduledExecutor(r -> {
Thread t = new Thread(r, "progress");
t.setDaemon(true);
return t;
});
reporter.scheduleAtFixedRate(() -> {
int c = completed.get();
if (c == 0) return;
double secs = (System.currentTimeMillis() - startMs) / 1000.0;
double rate = c / secs;
double etaH = (total - c) / rate / 3600;
System.err.printf("\r [%,d/%,d] %.1f/s | ETA %.1fh | fail %d",
c, total, rate, etaH, failed.get());
}, 2, 2, TimeUnit.SECONDS);
// Submit all work, wait for completion via CountDownLatch-like pattern
java.util.concurrent.CountDownLatch latch = new java.util.concurrent.CountDownLatch(remaining.size());
for (int idx : remaining) {
pool.submit(() -> {
try {
processOrigin(network, chunks, postcodes, originLats[idx], originLons[idx],
nDest, modeDir, mode, date, idx, threadConn.get());
completed.incrementAndGet();
} catch (Exception e) {
failed.incrementAndGet();
System.err.printf("%n [FAIL] origin %d: %s%n", idx, e.getMessage());
} finally {
latch.countDown();
}
});
}
latch.await();
reporter.shutdown();
double elapsedH = (System.currentTimeMillis() - startMs) / 3_600_000.0;
int n = completed.get();
System.err.printf("\r [%,d/%,d] %.1f/s | %.1fh | fail %d%n",
n, total, n / Math.max(elapsedH * 3600, 1), elapsedH, failed.get());
}
/** Compute and write travel times for a single origin, with retry on failure. */
private static void processOrigin(
TransportNetwork network, List<Router.DestinationChunk> chunks,
String[] postcodes, double lat, double lon, int nDest,
Path modeDir, String mode, LocalDate date, int idx,
DuckDBConnection conn) throws Exception {
Path outPath = modeDir.resolve(String.format("%06d.parquet", idx));
Exception lastError = null;
for (int attempt = 0; attempt <= MAX_RETRIES; attempt++) {
try {
handleTravelTimes(exchange);
short[] times = Router.computeTravelTimes(network, chunks, lat, lon, mode, nDest, date);
Parquet.writeTravelTimes(conn, outPath, postcodes, times);
return;
} catch (Exception e) {
System.err.println("Error handling travel-times: " + e.getMessage());
e.printStackTrace();
sendResponse(exchange, 500, "Internal server error: " + e.getMessage());
}
});
server.setExecutor(java.util.concurrent.Executors.newFixedThreadPool(4));
server.start();
System.out.println("R5 service listening on port 8003");
}
private static void sendResponse(HttpExchange exchange, int status, String body) throws IOException {
byte[] bytes = body.getBytes(StandardCharsets.UTF_8);
exchange.getResponseHeaders().set("Content-Type", "application/json");
exchange.sendResponseHeaders(status, bytes.length);
try (OutputStream os = exchange.getResponseBody()) {
os.write(bytes);
}
}
private static void handleTravelTimes(HttpExchange exchange) throws IOException {
long t0 = System.currentTimeMillis();
String body = new String(exchange.getRequestBody().readAllBytes(), StandardCharsets.UTF_8);
TravelTimeRequest req = gson.fromJson(body, TravelTimeRequest.class);
if (req.origin == null || req.origin.length != 2) {
sendResponse(exchange, 400, "{\"error\":\"origin must be [lat, lon]\"}");
return;
}
if (req.destinations == null || req.destinations.length == 0) {
sendResponse(exchange, 400, "{\"error\":\"destinations must be non-empty\"}");
return;
}
String mode = req.mode != null ? req.mode : "transit";
// Build destination point set (Coordinate takes x=lon, y=lat)
Coordinate[] coords = new Coordinate[req.destinations.length];
for (int i = 0; i < req.destinations.length; i++) {
coords[i] = new Coordinate(req.destinations[i][1], req.destinations[i][0]); // lon, lat
}
FreeFormPointSet destinations = new FreeFormPointSet(coords);
// Build the regional task
RegionalTask task = new RegionalTask();
task.fromLat = req.origin[0];
task.fromLon = req.origin[1];
task.date = LocalDate.now();
task.percentiles = new int[]{50};
task.recordTimes = true;
task.destinationPointSets = new PointSet[]{ destinations };
// Set grid extents from destination point set (required by TravelTimeComputer)
WebMercatorExtents extents = destinations.getWebMercatorExtents();
task.zoom = extents.zoom;
task.west = extents.west;
task.north = extents.north;
task.width = extents.width;
task.height = extents.height;
switch (mode) {
case "car":
task.fromTime = 8 * 3600;
task.toTime = 8 * 3600 + 60;
task.maxTripDurationMinutes = 120;
task.accessModes = EnumSet.of(LegMode.CAR);
task.egressModes = EnumSet.of(LegMode.CAR);
task.directModes = EnumSet.of(LegMode.CAR);
task.transitModes = EnumSet.noneOf(TransitModes.class);
break;
case "bicycle":
task.fromTime = 8 * 3600;
task.toTime = 8 * 3600 + 60;
task.maxTripDurationMinutes = 120;
task.accessModes = EnumSet.of(LegMode.BICYCLE);
task.egressModes = EnumSet.of(LegMode.BICYCLE);
task.directModes = EnumSet.of(LegMode.BICYCLE);
task.transitModes = EnumSet.noneOf(TransitModes.class);
break;
case "walking":
task.fromTime = 8 * 3600;
task.toTime = 8 * 3600 + 60;
task.maxTripDurationMinutes = 120;
task.accessModes = EnumSet.of(LegMode.WALK);
task.egressModes = EnumSet.of(LegMode.WALK);
task.directModes = EnumSet.of(LegMode.WALK);
task.transitModes = EnumSet.noneOf(TransitModes.class);
break;
default: // transit
task.fromTime = 8 * 3600;
task.toTime = 8 * 3600 + 60; // single RAPTOR sweep
task.maxTripDurationMinutes = 120;
task.maxRides = 4;
task.accessModes = EnumSet.of(LegMode.WALK);
task.egressModes = EnumSet.of(LegMode.WALK);
task.directModes = EnumSet.of(LegMode.WALK);
task.transitModes = EnumSet.of(TransitModes.TRANSIT);
break;
}
// Compute travel times
TravelTimeComputer computer = new TravelTimeComputer(task, network);
OneOriginResult result = computer.computeTravelTimes();
TravelTimeResponse response = new TravelTimeResponse();
response.travel_times = new double[req.destinations.length];
TravelTimeResult tt = result.travelTimes;
if (tt != null) {
int[][] values = tt.getValues();
// values[percentileIndex][destinationIndex]
for (int i = 0; i < req.destinations.length; i++) {
if (i < values[0].length && values[0][i] != Integer.MAX_VALUE) {
response.travel_times[i] = values[0][i]; // already in minutes
} else {
response.travel_times[i] = -1; // unreachable
lastError = e;
if (attempt < MAX_RETRIES) {
System.err.printf("%n [RETRY %d/%d] origin %d: %s%n",
attempt + 1, MAX_RETRIES, idx, e.getMessage());
}
}
} else {
for (int i = 0; i < req.destinations.length; i++) {
response.travel_times[i] = -1;
}
throw lastError;
}
/** Find origin indices that don't yet have output parquet files. */
private static List<Integer> findRemaining(Path modeDir, int nOrigins) throws Exception {
List<Integer> remaining = new ArrayList<>();
for (int i = 0; i < nOrigins; i++) {
Path f = modeDir.resolve(String.format("%06d.parquet", i));
if (!Files.exists(f) || Files.size(f) == 0) {
remaining.add(i);
}
}
return remaining;
}
long elapsed = System.currentTimeMillis() - t0;
System.out.println("Travel times (" + mode + ") computed for " + req.destinations.length +
" destinations in " + elapsed + "ms");
private static String requiredArg(String[] args, String name) {
for (int i = 0; i < args.length - 1; i++) {
if (args[i].equals(name)) return args[i + 1];
}
System.err.println("Missing required argument: " + name);
System.err.println("Usage: App --postcodes FILE --places FILE --output-dir DIR [--threads N]");
System.exit(1);
return null; // unreachable
}
sendResponse(exchange, 200, gson.toJson(response));
private static String optionalArg(String[] args, String name, String defaultValue) {
for (int i = 0; i < args.length - 1; i++) {
if (args[i].equals(name)) return args[i + 1];
}
return defaultValue;
}
private static String requiredEnv(String name) {
String val = System.getenv(name);
if (val == null) {
System.err.println("Missing required environment variable: " + name);
System.exit(1);
}
return val;
}
}

View file

@ -0,0 +1,112 @@
package propertymap;
import org.duckdb.DuckDBAppender;
import org.duckdb.DuckDBConnection;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.List;
/** DuckDB-based parquet I/O. */
public class Parquet {
record Postcodes(String[] codes, double[] lats, double[] lons) {}
static {
try { Class.forName("org.duckdb.DuckDBDriver"); }
catch (ClassNotFoundException e) { throw new RuntimeException(e); }
}
/** Load England postcodes, write reference parquet, return codes + flat lat/lon arrays. */
static Postcodes loadEnglandPostcodes(String parquetPath, Path refOut) throws Exception {
try (DuckDBConnection conn = connect(); Statement stmt = conn.createStatement()) {
stmt.execute("CREATE TABLE postcodes AS SELECT pcds, lat, \"long\" FROM read_parquet('"
+ parquetPath + "') WHERE ctry = 'E92000001'");
copyToParquet(stmt, "SELECT * FROM postcodes", refOut);
try (ResultSet rs = stmt.executeQuery("SELECT COUNT(*) FROM postcodes")) {
rs.next();
int n = rs.getInt(1);
String[] codes = new String[n];
double[] lats = new double[n];
double[] lons = new double[n];
try (ResultSet data = stmt.executeQuery("SELECT pcds, lat, \"long\" FROM postcodes")) {
int i = 0;
while (data.next()) {
codes[i] = data.getString(1);
lats[i] = data.getDouble(2);
lons[i] = data.getDouble(3);
i++;
}
}
return new Postcodes(codes, lats, lons);
}
}
}
/** Load places deduplicated by lat/lon, write reference parquet, return flat lat/lon arrays. */
static double[][] loadPlaces(String parquetPath, Path refOut) throws Exception {
try (DuckDBConnection conn = connect(); Statement stmt = conn.createStatement()) {
stmt.execute("CREATE TABLE places AS SELECT * EXCLUDE (rn) FROM ("
+ "SELECT *, ROW_NUMBER() OVER (PARTITION BY lat, lon) AS rn "
+ "FROM read_parquet('" + parquetPath + "')) WHERE rn = 1");
copyToParquet(stmt, "SELECT * FROM places", refOut);
try (ResultSet rs = stmt.executeQuery("SELECT COUNT(*) FROM places")) {
rs.next();
int n = rs.getInt(1);
// Return as [lats, lons] flat arrays
double[] lats = new double[n];
double[] lons = new double[n];
try (ResultSet data = stmt.executeQuery("SELECT lat, lon FROM places")) {
int i = 0;
while (data.next()) {
lats[i] = data.getDouble(1);
lons[i] = data.getDouble(2);
i++;
}
}
return new double[][]{lats, lons};
}
}
}
/** Write postcode travel times as a ZSTD-compressed parquet (atomic via tmp + rename). */
static void writeTravelTimes(DuckDBConnection conn, Path outPath, String[] postcodes, short[] times)
throws Exception {
Path tmp = outPath.resolveSibling(outPath.getFileName() + ".tmp");
try (Statement stmt = conn.createStatement()) {
stmt.execute("DROP TABLE IF EXISTS t");
stmt.execute("CREATE TABLE t (pcds VARCHAR, travel_minutes SMALLINT)");
}
try (DuckDBAppender appender = conn.createAppender("main", "t")) {
for (int i = 0; i < postcodes.length; i++) {
appender.beginRow();
appender.append(postcodes[i]);
appender.append(times[i]);
appender.endRow();
}
}
try (Statement stmt = conn.createStatement()) {
stmt.execute("COPY t TO '" + tmp.toAbsolutePath() + "' (FORMAT PARQUET, COMPRESSION ZSTD)");
}
Files.move(tmp, outPath, StandardCopyOption.REPLACE_EXISTING, StandardCopyOption.ATOMIC_MOVE);
}
/** Create a new in-memory DuckDB connection (for use as a per-thread reusable connection). */
static DuckDBConnection connect() throws Exception {
return (DuckDBConnection) DriverManager.getConnection("jdbc:duckdb:");
}
private static void copyToParquet(Statement stmt, String query, Path outPath) throws Exception {
stmt.execute("COPY (" + query + ") TO '" + outPath.toAbsolutePath()
+ "' (FORMAT PARQUET, COMPRESSION ZSTD)");
}
}

View file

@ -0,0 +1,211 @@
package propertymap;
import com.conveyal.r5.OneOriginResult;
import com.conveyal.r5.analyst.FreeFormPointSet;
import com.conveyal.r5.analyst.PointSet;
import com.conveyal.r5.analyst.TravelTimeComputer;
import com.conveyal.r5.analyst.WebMercatorExtents;
import com.conveyal.r5.analyst.cluster.RegionalTask;
import com.conveyal.r5.analyst.cluster.TravelTimeResult;
import com.conveyal.r5.api.util.LegMode;
import com.conveyal.r5.api.util.TransitModes;
import com.conveyal.r5.kryo.KryoNetworkSerializer;
import com.conveyal.r5.transit.TransportNetwork;
import org.locationtech.jts.geom.Coordinate;
import java.io.File;
import java.time.LocalDate;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.List;
/** R5 routing: network loading, point set construction, travel time computation. */
public class Router {
private static final int ZOOM = 9;
private static final int MAX_GRID_CELLS = 4_900_000; // under R5's 5M limit
/**
* A chunk of destinations that fits within R5's grid cell limit at zoom 9.
* originalIndices maps each position in this chunk back to the full destinations array.
*/
record DestinationChunk(FreeFormPointSet pointSet, WebMercatorExtents extents, int[] originalIndices) {}
/** Load or build the transport network with Kryo caching. */
static TransportNetwork loadNetwork(String dataDir, String cacheDir) throws Exception {
System.err.println("Loading transport network...");
File cacheFile = new File(cacheDir, "network.dat");
TransportNetwork network;
if (cacheFile.exists()) {
System.err.println(" Loading cached network from " + cacheFile);
network = KryoNetworkSerializer.read(cacheFile);
} else {
System.err.println(" Building network (first run, takes a few minutes)...");
network = TransportNetwork.fromDirectory(new File(dataDir));
new File(cacheDir).mkdirs();
KryoNetworkSerializer.write(network, cacheFile);
System.err.println(" Cached to " + cacheFile);
}
System.err.println(" Building distance tables...");
network.transitLayer.buildDistanceTables(null);
System.err.println(" Network ready");
return network;
}
/**
* Split destinations into geographic chunks that each fit within R5's grid cell limit.
* Sorts by latitude and splits into bands so each band's bounding box at zoom 9 is under 5M cells.
*/
static List<DestinationChunk> buildDestinationChunks(double[] lats, double[] lons) {
int n = lats.length;
// Sort indices by latitude for geographic chunking
Integer[] sorted = new Integer[n];
for (int i = 0; i < n; i++) sorted[i] = i;
Arrays.sort(sorted, (a, b) -> Double.compare(lats[a], lats[b]));
// Determine grid width (longitude span is the same for all chunks)
double minLon = Double.MAX_VALUE, maxLon = -Double.MAX_VALUE;
for (double lon : lons) {
minLon = Math.min(minLon, lon);
maxLon = Math.max(maxLon, lon);
}
int totalPixels = 256 << ZOOM;
int gridWidth = lonToPixel(maxLon, totalPixels) - lonToPixel(minLon, totalPixels) + 1;
int maxHeight = MAX_GRID_CELLS / gridWidth;
// Greedily build chunks: extend each band until it would exceed maxHeight
List<DestinationChunk> chunks = new ArrayList<>();
int start = 0;
while (start < n) {
int end = start + 1;
int topPixel = latToPixel(lats[sorted[start]], totalPixels);
while (end < n) {
int bottomPixel = latToPixel(lats[sorted[end]], totalPixels);
if (Math.abs(bottomPixel - topPixel) + 1 > maxHeight) break;
end++;
}
chunks.add(buildChunk(lats, lons, sorted, start, end));
start = end;
}
System.err.printf(" Split into %d chunks at zoom %d (grid width %d, max height %d)%n",
chunks.size(), ZOOM, gridWidth, maxHeight);
return chunks;
}
/** Compute travel times from one origin to all destinations across all chunks. */
static short[] computeTravelTimes(
TransportNetwork network, List<DestinationChunk> chunks,
double originLat, double originLon, String mode, int nDest, LocalDate date) {
short[] times = new short[nDest];
Arrays.fill(times, (short) -1);
for (DestinationChunk chunk : chunks) {
RegionalTask task = buildTask(chunk, originLat, originLon, mode, date);
TravelTimeComputer computer = new TravelTimeComputer(task, network);
OneOriginResult result = computer.computeTravelTimes();
TravelTimeResult tt = result.travelTimes;
if (tt != null) {
int[][] values = tt.getValues();
for (int i = 0; i < chunk.originalIndices.length && i < values[0].length; i++) {
if (values[0][i] != Integer.MAX_VALUE) {
times[chunk.originalIndices[i]] = (short) values[0][i];
}
}
}
}
return times;
}
private static DestinationChunk buildChunk(
double[] lats, double[] lons, Integer[] sorted, int start, int end) {
int size = end - start;
int[] originalIndices = new int[size];
Coordinate[] coords = new Coordinate[size];
double minLat = Double.MAX_VALUE, maxLat = -Double.MAX_VALUE;
double minLon = Double.MAX_VALUE, maxLon = -Double.MAX_VALUE;
for (int i = 0; i < size; i++) {
int idx = sorted[start + i];
originalIndices[i] = idx;
double lat = lats[idx], lon = lons[idx];
coords[i] = new Coordinate(lon, lat); // x=lon, y=lat
minLat = Math.min(minLat, lat);
maxLat = Math.max(maxLat, lat);
minLon = Math.min(minLon, lon);
maxLon = Math.max(maxLon, lon);
}
FreeFormPointSet pointSet = new FreeFormPointSet(coords);
int totalPixels = 256 << ZOOM;
int west = lonToPixel(minLon, totalPixels);
int north = latToPixel(maxLat, totalPixels);
int width = lonToPixel(maxLon, totalPixels) - west + 1;
int height = latToPixel(minLat, totalPixels) - north + 1;
WebMercatorExtents extents = new WebMercatorExtents(west, north, width, height, ZOOM);
return new DestinationChunk(pointSet, extents, originalIndices);
}
private static RegionalTask buildTask(
DestinationChunk chunk, double originLat, double originLon, String mode, LocalDate date) {
RegionalTask task = new RegionalTask();
task.fromLat = originLat;
task.fromLon = originLon;
task.date = date;
task.percentiles = new int[]{50};
task.recordTimes = true;
task.destinationPointSets = new PointSet[]{chunk.pointSet};
task.zoom = chunk.extents.zoom;
task.west = chunk.extents.west;
task.north = chunk.extents.north;
task.width = chunk.extents.width;
task.height = chunk.extents.height;
task.fromTime = 8 * 3600;
task.toTime = 8 * 3600 + 60;
task.maxTripDurationMinutes = 120;
configureMode(task, mode);
return task;
}
private static void configureMode(RegionalTask task, String mode) {
switch (mode) {
case "car" -> setDirectMode(task, LegMode.CAR);
case "bicycle" -> setDirectMode(task, LegMode.BICYCLE);
case "walking" -> setDirectMode(task, LegMode.WALK);
case "transit" -> {
task.maxRides = 4;
task.accessModes = EnumSet.of(LegMode.WALK);
task.egressModes = EnumSet.of(LegMode.WALK);
task.directModes = EnumSet.of(LegMode.WALK);
task.transitModes = EnumSet.of(TransitModes.TRANSIT);
}
default -> throw new IllegalArgumentException("Unknown mode: " + mode);
}
}
private static void setDirectMode(RegionalTask task, LegMode legMode) {
task.accessModes = EnumSet.of(legMode);
task.egressModes = EnumSet.of(legMode);
task.directModes = EnumSet.of(legMode);
task.transitModes = EnumSet.noneOf(TransitModes.class);
}
private static int lonToPixel(double lon, int totalPixels) {
return (int) Math.floor(totalPixels * (lon + 180.0) / 360.0);
}
private static int latToPixel(double lat, int totalPixels) {
double latRad = Math.toRadians(lat);
return (int) Math.floor(totalPixels * (1.0 - Math.log(Math.tan(latRad) + 1.0 / Math.cos(latRad)) / Math.PI) / 2.0);
}
}