This commit is contained in:
Andras Schmelczer 2026-02-15 22:39:49 +00:00
parent 03445188ea
commit 524580eb25
102 changed files with 36625 additions and 1295 deletions

View file

@ -14,13 +14,12 @@ set -euo pipefail
# - places_ref.parquet: place order reference
#
# Usage:
# ./r5-java/run.sh # 4 threads, 16g heap
# ./r5-java/run.sh --threads 8
# ./r5-java/run.sh --heap 24g
# ./r5-java/run.sh
# ./r5-java/run.sh --threads 8 --heap 24g
# --- Defaults ---
THREADS=28
HEAP=40g
THREADS=16
HEAP=16g
NETWORK_DIR=property-data/r5-network
OUTPUT_BASE=property-data/travel-times
R5_DIR=r5-java
@ -125,5 +124,5 @@ java -Xmx"$HEAP" -cp "$OUT_DIR:$LIB_DIR/*" propertymap.App \
echo ""
echo "=== Complete ==="
echo "Output: $OUTPUT_BASE/{car,bicycle,walking,transit}/"
echo "Output: $OUTPUT_BASE/{car,bicycle,walking,transit}/{place-name}.parquet"
echo "Reference: $OUTPUT_BASE/postcodes_ref.parquet, $OUTPUT_BASE/places_ref.parquet"

View file

@ -16,15 +16,19 @@ import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
/**
* Batch-compute travel times from each origin (place) to all destinations (postcodes)
* Batch-compute travel times from each origin (place) to nearby postcodes
* for all transport modes (car, bicycle, walking, transit).
*
* Output per mode: one parquet file per origin in {output-dir}/{mode}/{index}.parquet
* with columns (pcds VARCHAR, travel_minutes SMALLINT). -1 = unreachable within 120 min.
* Each origin is spatially pre-filtered to only route to postcodes within a
* plausible travel radius for the mode. Output is sparse: only reachable
* postcodes are written (unreachable = absent from file).
*
* Output per mode: one parquet file per origin in {output-dir}/{mode}/{name}.parquet
* with columns (pcds VARCHAR, travel_minutes SMALLINT).
*/
public class App {
private static final String[] MODES = {"car", "bicycle", "walking", "transit"};
private static final String[] MODES = {"bicycle", "walking", "transit", "car"};
private static final int MAX_RETRIES = 2;
public static void main(String[] args) throws Exception {
@ -42,18 +46,14 @@ public class App {
System.err.println("Loading postcodes (England only)...");
Parquet.Postcodes postcodes = Parquet.loadEnglandPostcodes(
postcodesPath, outDir.resolve("postcodes_ref.parquet"));
int nDest = postcodes.lats().length;
System.err.printf(" %,d postcodes%n", nDest);
List<Router.DestinationChunk> chunks = Router.buildDestinationChunks(postcodes.lats(), postcodes.lons());
System.err.printf(" %,d postcodes%n", postcodes.lats().length);
System.err.println("Loading places (deduplicated)...");
double[][] placesLatLon = Parquet.loadPlaces(placesPath, outDir.resolve("places_ref.parquet"));
double[] originLats = placesLatLon[0], originLons = placesLatLon[1];
Parquet.Places places = Parquet.loadPlaces(placesPath, outDir.resolve("places_ref.parquet"));
String[] originNames = places.names();
double[] originLats = places.lats(), originLons = places.lons();
int nOrigins = originLats.length;
System.err.printf(" %,d places%n", nOrigins);
System.err.printf(" Estimated output: %.1f GB (%,d x %,d x 2B)%n",
(double) nOrigins * nDest * 2 / 1e9, nOrigins, nDest);
// One thread pool shared across all modes
ExecutorService pool = Executors.newFixedThreadPool(threads);
@ -65,8 +65,8 @@ public class App {
try {
for (String mode : MODES) {
processMode(network, chunks, postcodes.codes(), originLats, originLons,
nDest, outDir, mode, today, pool, threadConn);
processMode(network, postcodes.codes(), postcodes.lats(), postcodes.lons(),
originNames, originLats, originLons, outDir, mode, today, pool, threadConn);
}
} finally {
pool.shutdown();
@ -75,17 +75,19 @@ public class App {
}
private static void processMode(
TransportNetwork network, List<Router.DestinationChunk> chunks,
String[] postcodes, double[] originLats, double[] originLons, int nDest,
TransportNetwork network,
String[] postcodes, double[] postcodeLats, double[] postcodeLons,
String[] originNames, double[] originLats, double[] originLons,
Path outDir, String mode, LocalDate date,
ExecutorService pool, ThreadLocal<DuckDBConnection> threadConn) throws Exception {
int nOrigins = originLats.length;
System.err.printf("%n=== %s ===%n", mode.toUpperCase());
System.err.printf(" Radius: %.0f km%n", Router.maxRadiusKm(mode));
Path modeDir = outDir.resolve(mode);
Files.createDirectories(modeDir);
List<Integer> remaining = findRemaining(modeDir, nOrigins);
List<Integer> remaining = findRemaining(modeDir, originNames);
int alreadyDone = nOrigins - remaining.size();
System.err.printf(" %,d done, %,d remaining%n", alreadyDone, remaining.size());
@ -121,12 +123,13 @@ public class App {
for (int idx : remaining) {
pool.submit(() -> {
try {
processOrigin(network, chunks, postcodes, originLats[idx], originLons[idx],
nDest, modeDir, mode, date, idx, threadConn.get());
processOrigin(network, postcodes, postcodeLats, postcodeLons,
originLats[idx], originLons[idx],
modeDir, mode, date, originNames[idx], threadConn.get());
completed.incrementAndGet();
} catch (Exception e) {
failed.incrementAndGet();
System.err.printf("%n [FAIL] origin %d: %s%n", idx, e.getMessage());
System.err.printf("%n [FAIL] origin %s: %s%n", originNames[idx], e.getMessage());
} finally {
latch.countDown();
}
@ -144,24 +147,43 @@ public class App {
/** Compute and write travel times for a single origin, with retry on failure. */
private static void processOrigin(
TransportNetwork network, List<Router.DestinationChunk> chunks,
String[] postcodes, double lat, double lon, int nDest,
Path modeDir, String mode, LocalDate date, int idx,
TransportNetwork network,
String[] postcodes, double[] postcodeLats, double[] postcodeLons,
double originLat, double originLon,
Path modeDir, String mode, LocalDate date, String name,
DuckDBConnection conn) throws Exception {
Path outPath = modeDir.resolve(String.format("%06d.parquet", idx));
Path outPath = modeDir.resolve(sanitizeFilename(name) + ".parquet");
Exception lastError = null;
for (int attempt = 0; attempt <= MAX_RETRIES; attempt++) {
try {
short[] times = Router.computeTravelTimes(network, chunks, lat, lon, mode, nDest, date);
Parquet.writeTravelTimes(conn, outPath, postcodes, times);
Router.FilteredResult result = Router.computeForOrigin(
network, postcodeLats, postcodeLons,
originLat, originLon, mode, date);
// Write only reachable postcodes (sparse output)
int reachable = 0;
for (short t : result.times()) if (t >= 0) reachable++;
String[] codes = new String[reachable];
short[] times = new short[reachable];
int j = 0;
for (int i = 0; i < result.times().length; i++) {
if (result.times()[i] >= 0) {
codes[j] = postcodes[result.originalIndices()[i]];
times[j] = result.times()[i];
j++;
}
}
Parquet.writeTravelTimes(conn, outPath, codes, times);
return;
} catch (Exception e) {
lastError = e;
if (attempt < MAX_RETRIES) {
System.err.printf("%n [RETRY %d/%d] origin %d: %s%n",
attempt + 1, MAX_RETRIES, idx, e.getMessage());
System.err.printf("%n [RETRY %d/%d] %s: %s%n",
attempt + 1, MAX_RETRIES, name, e.getMessage());
}
}
}
@ -169,10 +191,10 @@ public class App {
}
/** Find origin indices that don't yet have output parquet files. */
private static List<Integer> findRemaining(Path modeDir, int nOrigins) throws Exception {
private static List<Integer> findRemaining(Path modeDir, String[] names) throws Exception {
List<Integer> remaining = new ArrayList<>();
for (int i = 0; i < nOrigins; i++) {
Path f = modeDir.resolve(String.format("%06d.parquet", i));
for (int i = 0; i < names.length; i++) {
Path f = modeDir.resolve(sanitizeFilename(names[i]) + ".parquet");
if (!Files.exists(f) || Files.size(f) == 0) {
remaining.add(i);
}
@ -180,6 +202,13 @@ public class App {
return remaining;
}
/** Sanitize a place name into a safe filename (lowercase, spaces to hyphens, strip non-alphanumeric). */
private static String sanitizeFilename(String name) {
return name.toLowerCase()
.replaceAll("[^a-z0-9 -]", "")
.replaceAll("\\s+", "-");
}
private static String requiredArg(String[] args, String name) {
for (int i = 0; i < args.length - 1; i++) {
if (args[i].equals(name)) return args[i + 1];

View file

@ -16,6 +16,7 @@ import java.util.List;
public class Parquet {
record Postcodes(String[] codes, double[] lats, double[] lons) {}
record Places(String[] names, double[] lats, double[] lons) {}
static {
try { Class.forName("org.duckdb.DuckDBDriver"); }
@ -26,7 +27,7 @@ public class Parquet {
static Postcodes loadEnglandPostcodes(String parquetPath, Path refOut) throws Exception {
try (DuckDBConnection conn = connect(); Statement stmt = conn.createStatement()) {
stmt.execute("CREATE TABLE postcodes AS SELECT pcds, lat, \"long\" FROM read_parquet('"
+ parquetPath + "') WHERE ctry = 'E92000001'");
+ parquetPath + "') WHERE ctry = 'E92000001' AND doterm IS NULL");
copyToParquet(stmt, "SELECT * FROM postcodes", refOut);
try (ResultSet rs = stmt.executeQuery("SELECT COUNT(*) FROM postcodes")) {
@ -50,8 +51,8 @@ public class Parquet {
}
}
/** Load places deduplicated by lat/lon, write reference parquet, return flat lat/lon arrays. */
static double[][] loadPlaces(String parquetPath, Path refOut) throws Exception {
/** Load places deduplicated by lat/lon, write reference parquet, return names + flat lat/lon arrays. */
static Places loadPlaces(String parquetPath, Path refOut) throws Exception {
try (DuckDBConnection conn = connect(); Statement stmt = conn.createStatement()) {
stmt.execute("CREATE TABLE places AS SELECT * EXCLUDE (rn) FROM ("
+ "SELECT *, ROW_NUMBER() OVER (PARTITION BY lat, lon) AS rn "
@ -61,19 +62,20 @@ public class Parquet {
try (ResultSet rs = stmt.executeQuery("SELECT COUNT(*) FROM places")) {
rs.next();
int n = rs.getInt(1);
// Return as [lats, lons] flat arrays
String[] names = new String[n];
double[] lats = new double[n];
double[] lons = new double[n];
try (ResultSet data = stmt.executeQuery("SELECT lat, lon FROM places")) {
try (ResultSet data = stmt.executeQuery("SELECT name, lat, lon FROM places")) {
int i = 0;
while (data.next()) {
lats[i] = data.getDouble(1);
lons[i] = data.getDouble(2);
names[i] = data.getString(1);
lats[i] = data.getDouble(2);
lons[i] = data.getDouble(3);
i++;
}
}
return new double[][]{lats, lons};
return new Places(names, lats, lons);
}
}
}

View file

@ -20,17 +20,25 @@ import java.util.Arrays;
import java.util.EnumSet;
import java.util.List;
/** R5 routing: network loading, point set construction, travel time computation. */
/** R5 routing: network loading, spatial filtering, travel time computation. */
public class Router {
private static final int ZOOM = 9;
private static final int ZOOM = 9; // R5 enforces range 9-12
private static final int MAX_GRID_CELLS = 4_900_000; // under R5's 5M limit
/**
* A chunk of destinations that fits within R5's grid cell limit at zoom 9.
* originalIndices maps each position in this chunk back to the full destinations array.
*/
record DestinationChunk(FreeFormPointSet pointSet, WebMercatorExtents extents, int[] originalIndices) {}
/** Result of computing travel times for a single origin with spatial pre-filtering. */
record FilteredResult(int[] originalIndices, short[] times) {}
/** Max plausible travel radius in km for 120-minute trips. */
static double maxRadiusKm(String mode) {
return switch (mode) {
case "car" -> 150;
case "transit" -> 150;
case "bicycle" -> 60;
case "walking" -> 12;
default -> throw new IllegalArgumentException("Unknown mode: " + mode);
};
}
/** Load or build the transport network with Kryo caching. */
static TransportNetwork loadNetwork(String dataDir, String cacheDir) throws Exception {
@ -56,10 +64,80 @@ public class Router {
}
/**
* Split destinations into geographic chunks that each fit within R5's grid cell limit.
* Sorts by latitude and splits into bands so each band's bounding box at zoom 9 is under 5M cells.
* Filter destinations by distance, build chunks, compute travel times for one origin.
* Returns only the filtered subset indices and their travel times.
*/
static List<DestinationChunk> buildDestinationChunks(double[] lats, double[] lons) {
static FilteredResult computeForOrigin(
TransportNetwork network,
double[] allLats, double[] allLons,
double originLat, double originLon,
String mode, LocalDate date) {
double maxRadius = maxRadiusKm(mode);
// 1. Filter destinations by bounding box
int[] filtered = filterByDistance(allLats, allLons, originLat, originLon, maxRadius);
if (filtered.length == 0) {
return new FilteredResult(new int[0], new short[0]);
}
// 2. Extract filtered coordinate arrays
double[] fLats = new double[filtered.length];
double[] fLons = new double[filtered.length];
for (int i = 0; i < filtered.length; i++) {
fLats[i] = allLats[filtered[i]];
fLons[i] = allLons[filtered[i]];
}
// 3. Build chunks from filtered destinations
List<DestinationChunk> chunks = buildDestinationChunks(fLats, fLons);
// 4. Compute travel times
short[] times = computeTravelTimes(network, chunks, originLat, originLon, mode, fLats.length, date);
return new FilteredResult(filtered, times);
}
/**
* Filter destination indices to those within a bounding box of maxRadiusKm from origin.
* Uses degree-based approximation slightly overestimates at corners, which is fine.
*/
private static int[] filterByDistance(
double[] lats, double[] lons,
double originLat, double originLon,
double maxRadiusKm) {
double degLat = maxRadiusKm / 111.0;
double degLon = maxRadiusKm / (111.0 * Math.cos(Math.toRadians(originLat)));
double minLat = originLat - degLat;
double maxLat = originLat + degLat;
double minLon = originLon - degLon;
double maxLon = originLon + degLon;
// Two-pass: count then fill (avoids ArrayList/boxing overhead)
int count = 0;
for (int i = 0; i < lats.length; i++) {
if (lats[i] >= minLat && lats[i] <= maxLat && lons[i] >= minLon && lons[i] <= maxLon) {
count++;
}
}
int[] result = new int[count];
int j = 0;
for (int i = 0; i < lats.length; i++) {
if (lats[i] >= minLat && lats[i] <= maxLat && lons[i] >= minLon && lons[i] <= maxLon) {
result[j++] = i;
}
}
return result;
}
/**
* Split destinations into geographic chunks that each fit within R5's grid cell limit.
* Sorts by latitude and splits into bands so each band's bounding box is under 5M cells.
*/
private static List<DestinationChunk> buildDestinationChunks(double[] lats, double[] lons) {
int n = lats.length;
// Sort indices by latitude for geographic chunking
@ -94,13 +172,11 @@ public class Router {
start = end;
}
System.err.printf(" Split into %d chunks at zoom %d (grid width %d, max height %d)%n",
chunks.size(), ZOOM, gridWidth, maxHeight);
return chunks;
}
/** Compute travel times from one origin to all destinations across all chunks. */
static short[] computeTravelTimes(
private static short[] computeTravelTimes(
TransportNetwork network, List<DestinationChunk> chunks,
double originLat, double originLon, String mode, int nDest, LocalDate date) {
@ -125,6 +201,10 @@ public class Router {
return times;
}
// --- Private helpers ---
private record DestinationChunk(FreeFormPointSet pointSet, WebMercatorExtents extents, int[] originalIndices) {}
private static DestinationChunk buildChunk(
double[] lats, double[] lons, Integer[] sorted, int start, int end) {
int size = end - start;