better transit times

This commit is contained in:
Andras Schmelczer 2026-02-22 11:13:39 +00:00
parent 974f005549
commit 205302dbb8
22 changed files with 247 additions and 69 deletions

View file

@ -15,7 +15,7 @@ set -euo pipefail
#
# Usage:
# ./r5-java/run.sh
# ./r5-java/run.sh --threads 8 --heap 24g
# ./r5-java/run.sh --threads 8 --heap 24g --output-dir property-data/travel-times
# --- Defaults ---
THREADS=16
@ -30,6 +30,7 @@ while [[ $# -gt 0 ]]; do
--threads) THREADS="$2"; shift 2 ;;
--heap) HEAP="$2"; shift 2 ;;
--network-dir) NETWORK_DIR="$2"; shift 2 ;;
--output-dir) OUTPUT_BASE="$2"; shift 2 ;;
*) echo "Unknown: $1"; exit 1 ;;
esac
done
@ -75,7 +76,7 @@ fi
if [ ! -f "$DUCKDB_JAR" ]; then
echo "--- Downloading DuckDB JDBC ---"
curl -fL -o "$DUCKDB_JAR" https://repo1.maven.org/maven2/org/duckdb/duckdb_jdbc/1.0.0/duckdb_jdbc-1.0.0.jar
curl -fL -o "$DUCKDB_JAR" https://repo1.maven.org/maven2/org/duckdb/duckdb_jdbc/1.4.4.0/duckdb_jdbc-1.4.4.0.jar
fi
# --- Step 3: Compile Java source ---
@ -107,8 +108,12 @@ if [ ! -f "$NETWORK_DIR/network.dat" ]; then
BUILD_DIR="$NETWORK_DIR/build"
echo "--- No cached network — copying transit data to build dir ---"
mkdir -p "$BUILD_DIR"
cp property-data/transit/raw/*.osm.pbf "$BUILD_DIR/" 2>/dev/null || true
cp property-data/transit/*.zip "$BUILD_DIR/" 2>/dev/null || true
if ! cp property-data/transit/raw/*.osm.pbf "$BUILD_DIR/" 2>/dev/null; then
echo "Warning: no .osm.pbf files found in property-data/transit/raw/"
fi
if ! cp property-data/transit/*.zip "$BUILD_DIR/" 2>/dev/null; then
echo "Warning: no .zip files found in property-data/transit/"
fi
DATA_DIR="$BUILD_DIR"
fi

View file

@ -9,6 +9,7 @@ import java.nio.file.Paths;
import java.time.LocalDate;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
@ -24,7 +25,8 @@ import java.util.concurrent.atomic.AtomicInteger;
* postcodes are written (unreachable = absent from file).
*
* Output per mode: one parquet file per origin in {output-dir}/{mode}/{name}.parquet
* with columns (pcds VARCHAR, travel_minutes SMALLINT).
* with columns (pcds VARCHAR, travel_minutes SMALLINT). Transit mode additionally
* includes a best_minutes SMALLINT column (5th percentile = best-case departure timing).
*/
public class App {
@ -117,15 +119,14 @@ public class App {
c, total, rate, etaH, failed.get());
}, 2, 2, TimeUnit.SECONDS);
// Submit all work, wait for completion via CountDownLatch-like pattern
java.util.concurrent.CountDownLatch latch = new java.util.concurrent.CountDownLatch(remaining.size());
CountDownLatch latch = new CountDownLatch(remaining.size());
for (int idx : remaining) {
pool.submit(() -> {
try {
processOrigin(network, postcodes, postcodeLats, postcodeLons,
originLats[idx], originLons[idx],
modeDir, mode, date, originNames[idx], threadConn.get());
modeDir, mode, date, idx, originNames[idx], threadConn.get());
completed.incrementAndGet();
} catch (Exception e) {
failed.incrementAndGet();
@ -138,6 +139,7 @@ public class App {
latch.await();
reporter.shutdown();
reporter.awaitTermination(5, TimeUnit.SECONDS);
double elapsedH = (System.currentTimeMillis() - startMs) / 3_600_000.0;
int n = completed.get();
@ -150,10 +152,10 @@ public class App {
TransportNetwork network,
String[] postcodes, double[] postcodeLats, double[] postcodeLons,
double originLat, double originLon,
Path modeDir, String mode, LocalDate date, String name,
Path modeDir, String mode, LocalDate date, int index, String name,
DuckDBConnection conn) throws Exception {
Path outPath = modeDir.resolve(sanitizeFilename(name) + ".parquet");
Path outPath = modeDir.resolve(originFilename(index, name));
Exception lastError = null;
for (int attempt = 0; attempt <= MAX_RETRIES; attempt++) {
@ -168,16 +170,22 @@ public class App {
String[] codes = new String[reachable];
short[] times = new short[reachable];
short[] bestTimes = result.bestTimes() != null ? new short[reachable] : null;
int j = 0;
for (int i = 0; i < result.times().length; i++) {
if (result.times()[i] >= 0) {
codes[j] = postcodes[result.originalIndices()[i]];
times[j] = result.times()[i];
if (bestTimes != null) bestTimes[j] = result.bestTimes()[i];
j++;
}
}
Parquet.writeTravelTimes(conn, outPath, codes, times);
if (bestTimes != null) {
Parquet.writeTransitTravelTimes(conn, outPath, codes, times, bestTimes);
} else {
Parquet.writeTravelTimes(conn, outPath, codes, times);
}
return;
} catch (Exception e) {
lastError = e;
@ -194,7 +202,7 @@ public class App {
private static List<Integer> findRemaining(Path modeDir, String[] names) throws Exception {
List<Integer> remaining = new ArrayList<>();
for (int i = 0; i < names.length; i++) {
Path f = modeDir.resolve(sanitizeFilename(names[i]) + ".parquet");
Path f = modeDir.resolve(originFilename(i, names[i]));
if (!Files.exists(f) || Files.size(f) == 0) {
remaining.add(i);
}
@ -202,11 +210,12 @@ public class App {
return remaining;
}
/** Sanitize a place name into a safe filename (lowercase, spaces to hyphens, strip non-alphanumeric). */
private static String sanitizeFilename(String name) {
return name.toLowerCase()
/** Build a filename from index + place name (index prefix prevents collisions after sanitization). */
private static String originFilename(int index, String name) {
String safe = name.toLowerCase()
.replaceAll("[^a-z0-9 -]", "")
.replaceAll("\\s+", "-");
return String.format("%04d-%s.parquet", index, safe);
}
private static String requiredArg(String[] args, String name) {

View file

@ -23,11 +23,16 @@ public class Parquet {
catch (ClassNotFoundException e) { throw new RuntimeException(e); }
}
/** Escape a file path for safe interpolation into DuckDB SQL (double single quotes). */
private static String escapePath(String path) {
return path.replace("'", "''");
}
/** Load England postcodes, write reference parquet, return codes + flat lat/lon arrays. */
static Postcodes loadEnglandPostcodes(String parquetPath, Path refOut) throws Exception {
try (DuckDBConnection conn = connect(); Statement stmt = conn.createStatement()) {
stmt.execute("CREATE TABLE postcodes AS SELECT pcds, lat, \"long\" FROM read_parquet('"
+ parquetPath + "') WHERE ctry = 'E92000001' AND doterm IS NULL");
+ escapePath(parquetPath) + "') WHERE ctry = 'E92000001' AND doterm IS NULL");
copyToParquet(stmt, "SELECT * FROM postcodes", refOut);
try (ResultSet rs = stmt.executeQuery("SELECT COUNT(*) FROM postcodes")) {
@ -56,7 +61,7 @@ public class Parquet {
try (DuckDBConnection conn = connect(); Statement stmt = conn.createStatement()) {
stmt.execute("CREATE TABLE places AS SELECT * EXCLUDE (rn) FROM ("
+ "SELECT *, ROW_NUMBER() OVER (PARTITION BY lat, lon) AS rn "
+ "FROM read_parquet('" + parquetPath + "')) WHERE rn = 1");
+ "FROM read_parquet('" + escapePath(parquetPath) + "')) WHERE rn = 1");
copyToParquet(stmt, "SELECT * FROM places", refOut);
try (ResultSet rs = stmt.executeQuery("SELECT COUNT(*) FROM places")) {
@ -97,7 +102,30 @@ public class Parquet {
}
}
try (Statement stmt = conn.createStatement()) {
stmt.execute("COPY t TO '" + tmp.toAbsolutePath() + "' (FORMAT PARQUET, COMPRESSION ZSTD)");
stmt.execute("COPY t TO '" + escapePath(tmp.toAbsolutePath().toString()) + "' (FORMAT PARQUET, COMPRESSION ZSTD)");
}
Files.move(tmp, outPath, StandardCopyOption.REPLACE_EXISTING, StandardCopyOption.ATOMIC_MOVE);
}
/** Write transit travel times with both median and best-case columns. */
static void writeTransitTravelTimes(DuckDBConnection conn, Path outPath,
String[] postcodes, short[] times, short[] bestTimes) throws Exception {
Path tmp = outPath.resolveSibling(outPath.getFileName() + ".tmp");
try (Statement stmt = conn.createStatement()) {
stmt.execute("DROP TABLE IF EXISTS t");
stmt.execute("CREATE TABLE t (pcds VARCHAR, travel_minutes SMALLINT, best_minutes SMALLINT)");
}
try (DuckDBAppender appender = conn.createAppender("main", "t")) {
for (int i = 0; i < postcodes.length; i++) {
appender.beginRow();
appender.append(postcodes[i]);
appender.append(times[i]);
appender.append(bestTimes[i]);
appender.endRow();
}
}
try (Statement stmt = conn.createStatement()) {
stmt.execute("COPY t TO '" + escapePath(tmp.toAbsolutePath().toString()) + "' (FORMAT PARQUET, COMPRESSION ZSTD)");
}
Files.move(tmp, outPath, StandardCopyOption.REPLACE_EXISTING, StandardCopyOption.ATOMIC_MOVE);
}
@ -108,7 +136,7 @@ public class Parquet {
}
private static void copyToParquet(Statement stmt, String query, Path outPath) throws Exception {
stmt.execute("COPY (" + query + ") TO '" + outPath.toAbsolutePath()
stmt.execute("COPY (" + query + ") TO '" + escapePath(outPath.toAbsolutePath().toString())
+ "' (FORMAT PARQUET, COMPRESSION ZSTD)");
}
}

View file

@ -25,11 +25,14 @@ public class Router {
private static final int ZOOM = 9; // R5 enforces range 9-12
private static final int MAX_GRID_CELLS = 4_900_000; // under R5's 5M limit
private static final int DEPARTURE_FROM_TIME = 7 * 3600; // 07:00
private static final int DEPARTURE_TO_TIME = 9 * 3600; // 09:00
private static final int MAX_TRIP_DURATION_MINUTES = 120;
/** Result of computing travel times for a single origin with spatial pre-filtering. */
record FilteredResult(int[] originalIndices, short[] times) {}
record FilteredResult(int[] originalIndices, short[] times, short[] bestTimes) {}
/** Max plausible travel radius in km for 120-minute trips. */
/** Max plausible travel radius in km for {@link #MAX_TRIP_DURATION_MINUTES}-minute trips. */
static double maxRadiusKm(String mode) {
return switch (mode) {
case "car" -> 150;
@ -40,7 +43,10 @@ public class Router {
};
}
/** Load or build the transport network with Kryo caching. */
/**
* Load or build the transport network with Kryo caching.
* The returned network is read-only after buildDistanceTables safe for concurrent use.
*/
static TransportNetwork loadNetwork(String dataDir, String cacheDir) throws Exception {
System.err.println("Loading transport network...");
File cacheFile = new File(cacheDir, "network.dat");
@ -78,7 +84,7 @@ public class Router {
// 1. Filter destinations by bounding box
int[] filtered = filterByDistance(allLats, allLons, originLat, originLon, maxRadius);
if (filtered.length == 0) {
return new FilteredResult(new int[0], new short[0]);
return new FilteredResult(new int[0], new short[0], null);
}
// 2. Extract filtered coordinate arrays
@ -93,9 +99,14 @@ public class Router {
List<DestinationChunk> chunks = buildDestinationChunks(fLats, fLons);
// 4. Compute travel times
short[] times = computeTravelTimes(network, chunks, originLat, originLon, mode, fLats.length, date);
boolean isTransit = mode.equals("transit");
short[][] allTimes = computeTravelTimes(network, chunks, originLat, originLon, mode, fLats.length, date);
return new FilteredResult(filtered, times);
// For transit: allTimes[0]=best (5th percentile), allTimes[1]=median (50th)
// For others: allTimes[0]=median (50th), no best
short[] medianTimes = isTransit ? allTimes[1] : allTimes[0];
short[] bestTimes = isTransit ? allTimes[0] : null;
return new FilteredResult(filtered, medianTimes, bestTimes);
}
/**
@ -175,13 +186,18 @@ public class Router {
return chunks;
}
/** Compute travel times from one origin to all destinations across all chunks. */
private static short[] computeTravelTimes(
/**
* Compute travel times from one origin to all destinations across all chunks.
* Returns one short[] per requested percentile (transit gets 2: best + median, others get 1: median).
*/
private static short[][] computeTravelTimes(
TransportNetwork network, List<DestinationChunk> chunks,
double originLat, double originLon, String mode, int nDest, LocalDate date) {
short[] times = new short[nDest];
Arrays.fill(times, (short) -1);
boolean isTransit = mode.equals("transit");
int nPercentiles = isTransit ? 2 : 1;
short[][] allTimes = new short[nPercentiles][nDest];
for (short[] arr : allTimes) Arrays.fill(arr, (short) -1);
for (DestinationChunk chunk : chunks) {
RegionalTask task = buildTask(chunk, originLat, originLon, mode, date);
@ -191,14 +207,16 @@ public class Router {
TravelTimeResult tt = result.travelTimes;
if (tt != null) {
int[][] values = tt.getValues();
for (int i = 0; i < chunk.originalIndices.length && i < values[0].length; i++) {
if (values[0][i] != Integer.MAX_VALUE) {
times[chunk.originalIndices[i]] = (short) values[0][i];
for (int p = 0; p < nPercentiles && p < values.length; p++) {
for (int i = 0; i < chunk.originalIndices.length && i < values[p].length; i++) {
if (values[p][i] != Integer.MAX_VALUE) {
allTimes[p][chunk.originalIndices[i]] = (short) values[p][i];
}
}
}
}
}
return times;
return allTimes;
}
// --- Private helpers ---
@ -241,7 +259,7 @@ public class Router {
task.fromLat = originLat;
task.fromLon = originLon;
task.date = date;
task.percentiles = new int[]{50};
task.percentiles = mode.equals("transit") ? new int[]{5, 50} : new int[]{50};
task.recordTimes = true;
task.destinationPointSets = new PointSet[]{chunk.pointSet};
task.zoom = chunk.extents.zoom;
@ -249,9 +267,9 @@ public class Router {
task.north = chunk.extents.north;
task.width = chunk.extents.width;
task.height = chunk.extents.height;
task.fromTime = 8 * 3600;
task.toTime = 8 * 3600 + 60;
task.maxTripDurationMinutes = 120;
task.fromTime = DEPARTURE_FROM_TIME;
task.toTime = DEPARTURE_TO_TIME;
task.maxTripDurationMinutes = MAX_TRIP_DURATION_MINUTES;
configureMode(task, mode);
return task;
@ -267,13 +285,14 @@ public class Router {
task.accessModes = EnumSet.of(LegMode.WALK);
task.egressModes = EnumSet.of(LegMode.WALK);
task.directModes = EnumSet.of(LegMode.WALK);
task.transitModes = EnumSet.of(TransitModes.TRANSIT);
task.transitModes = EnumSet.allOf(TransitModes.class);
}
default -> throw new IllegalArgumentException("Unknown mode: " + mode);
}
}
private static void setDirectMode(RegionalTask task, LegMode legMode) {
task.maxRides = 0;
task.accessModes = EnumSet.of(legMode);
task.egressModes = EnumSet.of(legMode);
task.directModes = EnumSet.of(legMode);