has issues
This commit is contained in:
parent
2e112d7398
commit
c645b0f1d4
96 changed files with 2147083 additions and 5787 deletions
|
|
@ -21,8 +21,13 @@ set -euo pipefail
|
|||
# --demo only compute Bank + TCR, transit only (quick test)
|
||||
|
||||
# --- Defaults ---
|
||||
THREADS=6
|
||||
HEAP=40g
|
||||
THREADS=12
|
||||
# The execution cgroup caps process memory at 48 GB (see /sys/fs/cgroup/memory.max);
|
||||
# the nominal "64 GB" host total is not all addressable to one process. 28g heap +
|
||||
# ~15g native overhead (DuckDB JNI, R5 mapdb, Kryo deserialize, RAPTOR scratch)
|
||||
# leaves ~5g cgroup headroom — empirically the safe ceiling before SIGKILL.
|
||||
# Under 32g also keeps CompressedOops on, halving R5's reference-heavy footprint.
|
||||
HEAP=28g
|
||||
NETWORK_DIR=property-data/r5-network
|
||||
OUTPUT_BASE=property-data/travel-times
|
||||
R5_DIR=r5-java
|
||||
|
|
@ -36,6 +41,7 @@ while [[ $# -gt 0 ]]; do
|
|||
--network-dir) NETWORK_DIR="$2"; shift 2 ;;
|
||||
--output-dir) OUTPUT_BASE="$2"; shift 2 ;;
|
||||
--demo) DEMO_FLAG="--demo"; shift ;;
|
||||
--demo-cars=*) DEMO_FLAG="--demo-cars ${1#--demo-cars=}"; shift ;;
|
||||
*) echo "Unknown: $1"; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
|
@ -147,7 +153,14 @@ mkdir -p "$TMP_DIR"
|
|||
echo ""
|
||||
echo "--- Starting batch computation ---"
|
||||
DATA_DIR="$NETWORK_DATA_DIR" NETWORK_CACHE_DIR="$NETWORK_DIR" \
|
||||
java -Xms"$HEAP" -Xmx"$HEAP" -Djava.io.tmpdir="$TMP_DIR" -cp "$OUT_DIR:$LIB_DIR/*" propertymap.App \
|
||||
java -Xmx"$HEAP" \
|
||||
-XX:+UseParallelGC -XX:ParallelGCThreads=12 \
|
||||
-XX:+UseTransparentHugePages \
|
||||
-XX:NewRatio=2 \
|
||||
-XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath="$TMP_DIR/heapdump-$(date +%s).hprof" \
|
||||
-XX:+ExitOnOutOfMemoryError \
|
||||
-Xlog:gc*:file="$TMP_DIR/gc.log":time,uptime:filecount=5,filesize=20M \
|
||||
-Djava.io.tmpdir="$TMP_DIR" -cp "$OUT_DIR:$LIB_DIR/*" propertymap.App \
|
||||
--postcodes property-data/arcgis_data.parquet \
|
||||
--places property-data/places.parquet \
|
||||
--output-dir "$OUTPUT_BASE" \
|
||||
|
|
|
|||
|
|
@ -2,6 +2,8 @@ package propertymap;
|
|||
|
||||
import com.conveyal.r5.transit.TransportNetwork;
|
||||
import org.duckdb.DuckDBConnection;
|
||||
import org.locationtech.jts.index.strtree.STRtree;
|
||||
import propertymap.Router.PostcodeTile;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.DirectoryStream;
|
||||
|
|
@ -10,9 +12,12 @@ import java.nio.file.Path;
|
|||
import java.nio.file.Paths;
|
||||
import java.time.LocalDate;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.ArrayBlockingQueue;
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
|
|
@ -32,15 +37,84 @@ import java.util.concurrent.atomic.AtomicInteger;
|
|||
* with columns (pcds VARCHAR, travel_minutes SMALLINT). Transit mode additionally
|
||||
* includes a best_minutes SMALLINT column (5th percentile = best-case departure timing)
|
||||
* and a journey VARCHAR column with JSON leg instructions.
|
||||
*
|
||||
* Concurrency model:
|
||||
* - One per-mode routing thread pool (transit capped lower because path recording
|
||||
* allocates heavily per task; direct modes use the full --threads value).
|
||||
* - A process-lifetime writer pool of {@link #WRITER_THREADS} threads, each holding
|
||||
* its own DuckDB connection. Routing tasks enqueue {@link WriteJob}s onto a
|
||||
* bounded {@link ArrayBlockingQueue}; if writes lag, the queue applies backpressure
|
||||
* to routing.
|
||||
* - Within a mode, the latch counts down only after the write completes — so progress
|
||||
* and the inter-mode barrier reflect fully-persisted work.
|
||||
*/
|
||||
public class App {
|
||||
|
||||
private static final String[] MODES = {"bicycle", "transit", "walking", "car"};
|
||||
private static final String[] DEMO_MODES = {"transit"};
|
||||
private static final String[] MODES = {
|
||||
"bicycle", "walking", "car",
|
||||
"transit",
|
||||
"transit-no-bus",
|
||||
"transit-no-change",
|
||||
"transit-no-change-no-bus",
|
||||
"transit-one-change",
|
||||
"transit-one-change-no-bus",
|
||||
};
|
||||
private static final String[] DEMO_MODES = {"transit", "car", "bicycle", "walking"};
|
||||
private static final Set<String> DEMO_PLACES = Set.of(
|
||||
"Bank tube station", "Tottenham Court Road tube station");
|
||||
/**
|
||||
* Always-first origins (per-mode). The cache-warmest London core: these origins
|
||||
* each touch ~100 unique global tiles, so running them up front builds the
|
||||
* LinkageCache that every subsequent London-ish origin reuses.
|
||||
* Order within this list is preserved in submission order.
|
||||
*/
|
||||
private static final List<String> PRIORITY_PLACES = List.of(
|
||||
"Bank tube station",
|
||||
"Tower Gateway DLR station",
|
||||
"Tottenham Court Road tube station");
|
||||
private static final int MAX_RETRIES = 2;
|
||||
|
||||
/** Writer pool size. Holds one DuckDB connection per thread. */
|
||||
private static final int WRITER_THREADS = 4;
|
||||
|
||||
/**
|
||||
* Per-mode worker concurrency caps. Memory, not CPU, is the binding constraint
|
||||
* for long-radius modes: each origin allocates a per-origin FreeFormPointSet +
|
||||
* LinkedPointSet sized to the filtered dest count, plus R5's transient routing
|
||||
* state (StreetRouter cost arrays for the reachable street area). London car
|
||||
* origins filter to ~1M postcodes within 150km, so each in-flight car task is
|
||||
* ~500MB-1GB of state. 12 concurrent of those OOM the 28g heap.
|
||||
*
|
||||
* Tuned to the 28g heap ceiling:
|
||||
* transit → 4 (tile-cached but path recording allocates heavily per task)
|
||||
* car → 4 (~1M dests per origin, 150km radius)
|
||||
* bicycle → 8 (~250k dests per origin, 60km radius)
|
||||
* walking → full (~few k dests per origin, 12km radius)
|
||||
*/
|
||||
private static final int MAX_TRANSIT_THREADS = 4;
|
||||
private static final int MAX_CAR_THREADS = 4;
|
||||
private static final int MAX_BICYCLE_THREADS = 8;
|
||||
|
||||
/** Sentinel enqueued at shutdown to wake writer threads. */
|
||||
private static final WriteJob POISON = new WriteJob(null, null, null, null, null, null, null, null, null);
|
||||
|
||||
/**
|
||||
* A flattened result ready to be persisted. {@code bestTimes}/{@code journeys}
|
||||
* are non-null only for transit. Writers decrement {@code completed} or
|
||||
* {@code failed} and {@code latch} when done.
|
||||
*/
|
||||
record WriteJob(
|
||||
Path outPath,
|
||||
String[] codes,
|
||||
short[] times,
|
||||
short[] bestTimes,
|
||||
String[] journeys,
|
||||
String originName,
|
||||
AtomicInteger completed,
|
||||
AtomicInteger failed,
|
||||
CountDownLatch latch
|
||||
) {}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
String postcodesPath = requiredArg(args, "--postcodes");
|
||||
String placesPath = requiredArg(args, "--places");
|
||||
|
|
@ -61,6 +135,16 @@ public class App {
|
|||
postcodesPath, outDir.resolve("postcodes_ref.parquet"));
|
||||
System.err.printf(" %,d postcodes%n", postcodes.lats().length);
|
||||
|
||||
System.err.println("Building STRtree spatial index over postcodes...");
|
||||
STRtree postcodeIndex = Router.buildPostcodeIndex(postcodes.lats(), postcodes.lons());
|
||||
|
||||
System.err.println("Building global transit tiles (shared FreeFormPointSets)...");
|
||||
List<PostcodeTile> transitTiles = Router.buildGlobalTransitTiles(postcodes.lats(), postcodes.lons());
|
||||
System.err.printf(" %,d tiles (max %d dests each)%n",
|
||||
transitTiles.size(),
|
||||
transitTiles.stream().mapToInt(t -> t.originalIndices().length).max().orElse(0));
|
||||
Router.preloadTransitTileLinkages(network, transitTiles);
|
||||
|
||||
System.err.println("Loading places (deduplicated)...");
|
||||
Parquet.Places places = Parquet.loadPlaces(placesPath, outDir.resolve("places_ref.parquet"));
|
||||
String[] originNames = places.names();
|
||||
|
|
@ -80,7 +164,24 @@ public class App {
|
|||
// In demo mode, filter to just Bank + TCR and transit only
|
||||
int[] originIndices;
|
||||
String[] modes;
|
||||
if (demo) {
|
||||
int demoCars = Integer.parseInt(optionalArg(args, "--demo-cars", "0"));
|
||||
if (demoCars > 0) {
|
||||
// Leak-test path: run car mode only on the top-N England origins by
|
||||
// workload (≈ postcode count within 150km). London-area origins go
|
||||
// first, exercising the heaviest per-origin allocations.
|
||||
Integer[] englandArr = englandIndices.toArray(new Integer[0]);
|
||||
java.util.Arrays.sort(englandArr, (a, b) -> Integer.compare(
|
||||
Router.estimateWorkload(postcodeIndex, originLats[b], originLons[b], 150),
|
||||
Router.estimateWorkload(postcodeIndex, originLats[a], originLons[a], 150)));
|
||||
originIndices = java.util.Arrays.stream(englandArr).limit(demoCars).mapToInt(Integer::intValue).toArray();
|
||||
modes = new String[]{"car"};
|
||||
System.err.printf("DEMO-CARS MODE: %d origins (car only, LPT-ordered)%n", originIndices.length);
|
||||
for (int i = 0; i < Math.min(5, originIndices.length); i++) {
|
||||
System.err.printf(" - %s (workload ~%d)%n", originNames[originIndices[i]],
|
||||
Router.estimateWorkload(postcodeIndex, originLats[originIndices[i]], originLons[originIndices[i]], 150));
|
||||
}
|
||||
if (originIndices.length > 5) System.err.printf(" ... and %d more%n", originIndices.length - 5);
|
||||
} else if (demo) {
|
||||
List<Integer> demoIdx = new ArrayList<>();
|
||||
for (int i = 0; i < nOrigins; i++) {
|
||||
if (DEMO_PLACES.contains(originNames[i])) demoIdx.add(i);
|
||||
|
|
@ -96,25 +197,96 @@ public class App {
|
|||
modes = MODES;
|
||||
}
|
||||
|
||||
// One thread pool shared across all modes
|
||||
ExecutorService pool = Executors.newFixedThreadPool(threads);
|
||||
// One DuckDB connection per thread, reused across all writes
|
||||
ThreadLocal<DuckDBConnection> threadConn = ThreadLocal.withInitial(() -> {
|
||||
try { return Parquet.connect(); }
|
||||
catch (Exception e) { throw new RuntimeException(e); }
|
||||
});
|
||||
|
||||
if (enablePaths) System.err.println("Path recording ENABLED (transit only, ~20x slower)");
|
||||
|
||||
// Writer pool (lives across all modes). Bounded queue applies backpressure
|
||||
// to routing workers when writes lag behind.
|
||||
BlockingQueue<WriteJob> writeQueue = new ArrayBlockingQueue<>(Math.max(threads * 2, 16));
|
||||
ExecutorService writerPool = Executors.newFixedThreadPool(WRITER_THREADS, r -> {
|
||||
Thread t = new Thread(r, "parquet-writer");
|
||||
t.setDaemon(true);
|
||||
return t;
|
||||
});
|
||||
for (int i = 0; i < WRITER_THREADS; i++) {
|
||||
writerPool.submit(() -> writerLoop(writeQueue));
|
||||
}
|
||||
|
||||
boolean skipCompleted = !demo && demoCars == 0;
|
||||
try {
|
||||
for (String mode : modes) {
|
||||
processMode(network, postcodes.codes(), postcodes.lats(), postcodes.lons(),
|
||||
originNames, originLats, originLons, outDir, mode, today, pool, threadConn, enablePaths,
|
||||
originIndices, !demo);
|
||||
int modeThreads = threadsForMode(mode, threads);
|
||||
processMode(network, postcodeIndex, transitTiles,
|
||||
postcodes.codes(), postcodes.lats(), postcodes.lons(),
|
||||
originNames, originLats, originLons, outDir, mode, today,
|
||||
modeThreads, writeQueue, enablePaths, originIndices, skipCompleted);
|
||||
}
|
||||
} finally {
|
||||
pool.shutdown();
|
||||
pool.awaitTermination(Long.MAX_VALUE, TimeUnit.MILLISECONDS);
|
||||
// Wake every writer with a poison pill, then shut down.
|
||||
for (int i = 0; i < WRITER_THREADS; i++) {
|
||||
writeQueue.put(POISON);
|
||||
}
|
||||
writerPool.shutdown();
|
||||
writerPool.awaitTermination(Long.MAX_VALUE, TimeUnit.MILLISECONDS);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Writer thread main loop: pull jobs, write parquet, count down latch.
|
||||
* Each writer owns one long-lived DuckDB connection.
|
||||
*/
|
||||
private static void writerLoop(BlockingQueue<WriteJob> queue) {
|
||||
// Recycle the DuckDB connection every CONN_RECYCLE_EVERY writes. Long-lived
|
||||
// in-memory DuckDB connections accumulate buffer pages / catalog state that
|
||||
// doesn't get fully released by DROP TABLE alone; close + reopen forces it.
|
||||
// Set to 10 (vs 50) after leak testing showed +60MB/origin growth at 50 —
|
||||
// tighter recycling keeps the per-connection working set bounded.
|
||||
final int CONN_RECYCLE_EVERY = 10;
|
||||
DuckDBConnection conn;
|
||||
try {
|
||||
conn = Parquet.connect();
|
||||
} catch (Exception e) {
|
||||
System.err.println("Writer failed to open DuckDB connection: " + e);
|
||||
return;
|
||||
}
|
||||
int writesSinceRecycle = 0;
|
||||
try {
|
||||
while (true) {
|
||||
WriteJob job;
|
||||
try {
|
||||
job = queue.take();
|
||||
} catch (InterruptedException ie) {
|
||||
Thread.currentThread().interrupt();
|
||||
return;
|
||||
}
|
||||
if (job == POISON) return;
|
||||
try {
|
||||
if (job.bestTimes() != null) {
|
||||
Parquet.writeTransitTravelTimes(conn, job.outPath(),
|
||||
job.codes(), job.times(), job.bestTimes(), job.journeys());
|
||||
} else {
|
||||
Parquet.writeTravelTimes(conn, job.outPath(),
|
||||
job.codes(), job.times());
|
||||
}
|
||||
job.completed().incrementAndGet();
|
||||
} catch (Exception e) {
|
||||
job.failed().incrementAndGet();
|
||||
System.err.printf("%n [WRITE FAIL] %s: %s%n", job.originName(), e.getMessage());
|
||||
} finally {
|
||||
job.latch().countDown();
|
||||
}
|
||||
if (++writesSinceRecycle >= CONN_RECYCLE_EVERY) {
|
||||
try { conn.close(); } catch (Exception ignore) {}
|
||||
try {
|
||||
conn = Parquet.connect();
|
||||
} catch (Exception e) {
|
||||
System.err.println("Writer failed to reopen DuckDB connection: " + e);
|
||||
return;
|
||||
}
|
||||
writesSinceRecycle = 0;
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
try { conn.close(); } catch (Exception ignore) {}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -123,14 +295,14 @@ public class App {
|
|||
* @param skipCompleted if true, skip origins that already have output files.
|
||||
*/
|
||||
private static void processMode(
|
||||
TransportNetwork network,
|
||||
TransportNetwork network, STRtree postcodeIndex, List<PostcodeTile> transitTiles,
|
||||
String[] postcodes, double[] postcodeLats, double[] postcodeLons,
|
||||
String[] originNames, double[] originLats, double[] originLons,
|
||||
Path outDir, String mode, LocalDate date,
|
||||
ExecutorService pool, ThreadLocal<DuckDBConnection> threadConn,
|
||||
int modeThreads, BlockingQueue<WriteJob> writeQueue,
|
||||
boolean enablePaths, int[] originIndices, boolean skipCompleted) throws Exception {
|
||||
|
||||
System.err.printf("%n=== %s ===%n", mode.toUpperCase());
|
||||
System.err.printf("%n=== %s (workers=%d) ===%n", mode.toUpperCase(), modeThreads);
|
||||
System.err.printf(" Radius: %.0f km%n", Router.maxRadiusKm(mode));
|
||||
Path modeDir = outDir.resolve(mode);
|
||||
Files.createDirectories(modeDir);
|
||||
|
|
@ -155,6 +327,22 @@ public class App {
|
|||
return;
|
||||
}
|
||||
|
||||
// Ordering policy:
|
||||
// 1. PRIORITY_PLACES first, in the literal order they're listed (Bank, Tower
|
||||
// Gateway DLR, TCR). These dense London origins are the best LinkageCache
|
||||
// warmers — every later origin in the SE benefits.
|
||||
// 2. Then LPT (longest-processing-time-first): dense urban origins do far
|
||||
// more work than rural ones. Submitting them first prevents a long tail
|
||||
// where a few London origins finish after everything else drains.
|
||||
double modeRadius = Router.maxRadiusKm(mode);
|
||||
remaining.sort(Comparator.<Integer, Integer>comparing(
|
||||
idx -> {
|
||||
int prio = PRIORITY_PLACES.indexOf(originNames[idx]);
|
||||
return prio < 0 ? Integer.MAX_VALUE : prio;
|
||||
})
|
||||
.thenComparing(Comparator.comparingInt((Integer idx) ->
|
||||
Router.estimateWorkload(postcodeIndex, originLats[idx], originLons[idx], modeRadius)).reversed()));
|
||||
|
||||
long startMs = System.currentTimeMillis();
|
||||
int total = remaining.size();
|
||||
AtomicInteger completed = new AtomicInteger(0);
|
||||
|
|
@ -177,27 +365,40 @@ public class App {
|
|||
}, 2, 2, TimeUnit.SECONDS);
|
||||
|
||||
CountDownLatch latch = new CountDownLatch(remaining.size());
|
||||
ExecutorService pool = Executors.newFixedThreadPool(modeThreads, r -> {
|
||||
Thread t = new Thread(r, "r5-" + mode);
|
||||
t.setDaemon(true);
|
||||
return t;
|
||||
});
|
||||
|
||||
for (int idx : remaining) {
|
||||
pool.submit(() -> {
|
||||
try {
|
||||
processOrigin(network, postcodes, postcodeLats, postcodeLons,
|
||||
originLats[idx], originLons[idx],
|
||||
modeDir, mode, date, idx, originNames[idx], threadConn.get(), enablePaths);
|
||||
completed.incrementAndGet();
|
||||
} catch (Exception e) {
|
||||
failed.incrementAndGet();
|
||||
System.err.printf("%n [FAIL] origin %s: %s%n", originNames[idx], e.getMessage());
|
||||
} finally {
|
||||
latch.countDown();
|
||||
}
|
||||
});
|
||||
try {
|
||||
for (int idx : remaining) {
|
||||
final int originIdx = idx;
|
||||
pool.submit(() -> {
|
||||
try {
|
||||
processOrigin(network, postcodeIndex, transitTiles,
|
||||
postcodes, postcodeLats, postcodeLons,
|
||||
originLats[originIdx], originLons[originIdx],
|
||||
modeDir, mode, date, originIdx, originNames[originIdx],
|
||||
writeQueue, enablePaths, completed, failed, latch);
|
||||
} catch (Exception e) {
|
||||
// processOrigin only throws before a WriteJob is enqueued.
|
||||
// The caller owns failure accounting and latch countdown.
|
||||
failed.incrementAndGet();
|
||||
System.err.printf("%n [FAIL] origin %s: %s%n", originNames[originIdx], e.getMessage());
|
||||
latch.countDown();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
latch.await();
|
||||
} finally {
|
||||
pool.shutdown();
|
||||
pool.awaitTermination(Long.MAX_VALUE, TimeUnit.MILLISECONDS);
|
||||
reporter.shutdown();
|
||||
reporter.awaitTermination(5, TimeUnit.SECONDS);
|
||||
}
|
||||
|
||||
latch.await();
|
||||
reporter.shutdown();
|
||||
reporter.awaitTermination(5, TimeUnit.SECONDS);
|
||||
|
||||
double elapsedH = (System.currentTimeMillis() - startMs) / 3_600_000.0;
|
||||
int n = completed.get();
|
||||
System.err.printf("\r [%,d/%,d] %.1f/s | %.1fh | fail %d%n",
|
||||
|
|
@ -211,13 +412,17 @@ public class App {
|
|||
}
|
||||
}
|
||||
|
||||
/** Compute and write travel times for a single origin, with retry on failure. */
|
||||
/**
|
||||
* Route from a single origin (with retry), flatten the sparse result, and enqueue
|
||||
* for the writer pool. The latch is decremented by the writer after enqueue.
|
||||
*/
|
||||
private static void processOrigin(
|
||||
TransportNetwork network,
|
||||
TransportNetwork network, STRtree postcodeIndex, List<PostcodeTile> transitTiles,
|
||||
String[] postcodes, double[] postcodeLats, double[] postcodeLons,
|
||||
double originLat, double originLon,
|
||||
Path modeDir, String mode, LocalDate date, int index, String name,
|
||||
DuckDBConnection conn, boolean enablePaths) throws Exception {
|
||||
BlockingQueue<WriteJob> writeQueue, boolean enablePaths,
|
||||
AtomicInteger completed, AtomicInteger failed, CountDownLatch latch) throws Exception {
|
||||
|
||||
Path outPath = modeDir.resolve(originFilename(index, name));
|
||||
Exception lastError = null;
|
||||
|
|
@ -225,10 +430,10 @@ public class App {
|
|||
for (int attempt = 0; attempt <= MAX_RETRIES; attempt++) {
|
||||
try {
|
||||
Router.FilteredResult result = Router.computeForOrigin(
|
||||
network, postcodeLats, postcodeLons,
|
||||
network, postcodeIndex, transitTiles, postcodeLats, postcodeLons,
|
||||
originLat, originLon, mode, date, enablePaths);
|
||||
|
||||
// Write only reachable postcodes (sparse output)
|
||||
// Flatten to only reachable postcodes (sparse output)
|
||||
int reachable = 0;
|
||||
for (short t : result.times()) if (t >= 0) reachable++;
|
||||
|
||||
|
|
@ -247,11 +452,10 @@ public class App {
|
|||
}
|
||||
}
|
||||
|
||||
if (bestTimes != null) {
|
||||
Parquet.writeTransitTravelTimes(conn, outPath, codes, times, bestTimes, journeys);
|
||||
} else {
|
||||
Parquet.writeTravelTimes(conn, outPath, codes, times);
|
||||
}
|
||||
// Hand off to the writer pool. Blocking put() applies backpressure so
|
||||
// routing slows down naturally if disk/compression can't keep up.
|
||||
writeQueue.put(new WriteJob(outPath, codes, times, bestTimes, journeys,
|
||||
name, completed, failed, latch));
|
||||
return;
|
||||
} catch (Exception e) {
|
||||
lastError = e;
|
||||
|
|
@ -264,6 +468,8 @@ public class App {
|
|||
}
|
||||
}
|
||||
}
|
||||
// All retries exhausted; no WriteJob was enqueued, so the caller
|
||||
// owns failure accounting and latch countdown.
|
||||
throw lastError;
|
||||
}
|
||||
|
||||
|
|
@ -303,6 +509,19 @@ public class App {
|
|||
return slugs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Cap routing concurrency per mode based on memory footprint per in-flight task.
|
||||
* See the MAX_*_THREADS constant block for the reasoning behind each cap.
|
||||
*/
|
||||
private static int threadsForMode(String mode, int defaultThreads) {
|
||||
if (Router.isTransitMode(mode)) return Math.min(defaultThreads, MAX_TRANSIT_THREADS);
|
||||
return switch (mode) {
|
||||
case "car" -> Math.min(defaultThreads, MAX_CAR_THREADS);
|
||||
case "bicycle" -> Math.min(defaultThreads, MAX_BICYCLE_THREADS);
|
||||
default -> defaultThreads; // walking
|
||||
};
|
||||
}
|
||||
|
||||
private static String requiredArg(String[] args, String name) {
|
||||
for (int i = 0; i < args.length - 1; i++) {
|
||||
if (args[i].equals(name)) return args[i + 1];
|
||||
|
|
|
|||
|
|
@ -119,7 +119,11 @@ public class Parquet {
|
|||
}
|
||||
}
|
||||
try (Statement stmt = conn.createStatement()) {
|
||||
stmt.execute("COPY t TO '" + escapePath(tmp.toAbsolutePath().toString()) + "' (FORMAT PARQUET, COMPRESSION ZSTD)");
|
||||
stmt.execute("COPY t TO '" + escapePath(tmp.toAbsolutePath().toString()) + "' (FORMAT PARQUET, COMPRESSION ZSTD, COMPRESSION_LEVEL 1)");
|
||||
// Drop the populated table NOW so DuckDB releases its in-memory storage
|
||||
// for the next write. Without this, the previous origin's rows linger
|
||||
// until the next call's DROP IF EXISTS — accumulating across writers.
|
||||
stmt.execute("DROP TABLE t");
|
||||
}
|
||||
Files.move(tmp, outPath, StandardCopyOption.REPLACE_EXISTING, StandardCopyOption.ATOMIC_MOVE);
|
||||
}
|
||||
|
|
@ -149,7 +153,8 @@ public class Parquet {
|
|||
}
|
||||
}
|
||||
try (Statement stmt = conn.createStatement()) {
|
||||
stmt.execute("COPY t TO '" + escapePath(tmp.toAbsolutePath().toString()) + "' (FORMAT PARQUET, COMPRESSION ZSTD)");
|
||||
stmt.execute("COPY t TO '" + escapePath(tmp.toAbsolutePath().toString()) + "' (FORMAT PARQUET, COMPRESSION ZSTD, COMPRESSION_LEVEL 1)");
|
||||
stmt.execute("DROP TABLE t");
|
||||
}
|
||||
Files.move(tmp, outPath, StandardCopyOption.REPLACE_EXISTING, StandardCopyOption.ATOMIC_MOVE);
|
||||
}
|
||||
|
|
@ -161,6 +166,6 @@ public class Parquet {
|
|||
|
||||
private static void copyToParquet(Statement stmt, String query, Path outPath) throws Exception {
|
||||
stmt.execute("COPY (" + query + ") TO '" + escapePath(outPath.toAbsolutePath().toString())
|
||||
+ "' (FORMAT PARQUET, COMPRESSION ZSTD)");
|
||||
+ "' (FORMAT PARQUET, COMPRESSION ZSTD, COMPRESSION_LEVEL 1)");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ package propertymap;
|
|||
|
||||
import com.conveyal.r5.OneOriginResult;
|
||||
import com.conveyal.r5.analyst.FreeFormPointSet;
|
||||
import com.conveyal.r5.analyst.LinkageCache;
|
||||
import com.conveyal.r5.analyst.PointSet;
|
||||
import com.conveyal.r5.analyst.StreetTimesAndModes;
|
||||
import com.conveyal.r5.analyst.TravelTimeComputer;
|
||||
|
|
@ -12,11 +13,14 @@ import com.conveyal.r5.analyst.cluster.TravelTimeResult;
|
|||
import com.conveyal.r5.api.util.LegMode;
|
||||
import com.conveyal.r5.api.util.TransitModes;
|
||||
import com.conveyal.r5.kryo.KryoNetworkSerializer;
|
||||
import com.conveyal.r5.profile.StreetMode;
|
||||
import com.conveyal.r5.transit.TransitLayer;
|
||||
import com.conveyal.r5.transit.TransportNetwork;
|
||||
import com.conveyal.r5.transit.path.RouteSequence;
|
||||
import com.google.common.collect.Multimap;
|
||||
import org.locationtech.jts.geom.Coordinate;
|
||||
import org.locationtech.jts.geom.Envelope;
|
||||
import org.locationtech.jts.index.strtree.STRtree;
|
||||
|
||||
import java.io.File;
|
||||
import java.time.LocalDate;
|
||||
|
|
@ -31,12 +35,29 @@ public class Router {
|
|||
|
||||
private static final int ZOOM = 9; // R5 enforces range 9-12
|
||||
private static final int MAX_GRID_CELLS = 4_900_000; // under R5's 5M limit
|
||||
private static final int DEPARTURE_FROM_TIME = 7 * 3600 + 30 * 60; // 07:30
|
||||
private static final int DEPARTURE_TO_TIME = 8 * 3600 + 30 * 60; // 08:30
|
||||
// 30-minute peak window: RAPTOR cost is linear in (toTime-fromTime)/60.
|
||||
// best_minutes (5th percentile) is the best of these 30 minute-shifted departures.
|
||||
private static final int DEPARTURE_FROM_TIME = 7 * 3600 + 45 * 60; // 07:45
|
||||
private static final int DEPARTURE_TO_TIME = 8 * 3600 + 15 * 60; // 08:15
|
||||
private static final int MAX_TRIP_DURATION_MINUTES = 90;
|
||||
// Transit-only: cap walk access/egress at 20 min to shrink the egress
|
||||
// street subgraph PerTargetPropagater walks per stop.
|
||||
private static final int TRANSIT_MAX_WALK_TIME_MIN = 20;
|
||||
|
||||
// Hard R5 limit when path recording is enabled (PathResult internals).
|
||||
// Larger is better here: each chunk forces R5 to rebuild the egress cost
|
||||
// table (~334k stop linkages), so fewer chunks per origin = fewer rebuilds.
|
||||
private static final int PATH_MAX_DESTINATIONS = 5000;
|
||||
|
||||
// Per-chunk destination cap for non-transit direct modes (car/bicycle/walking).
|
||||
// London car origins filter to ~1M postcodes within 150km. Without a cap, each
|
||||
// chunk's per-task LinkedPointSet + FreeFormPointSet allocate ~50-100 MB and
|
||||
// R5's StreetRouter scratch state stacks across concurrent workers, OOMing the
|
||||
// heap. 150k caps per-chunk transient memory at ~5-10 MB; chunk count for
|
||||
// London goes from 1 to ~7, adding ~10-20% wall-clock per origin via repeated
|
||||
// Dijkstra. Walking has so few dests this is a no-op.
|
||||
private static final int DIRECT_MAX_DESTINATIONS = 150_000;
|
||||
|
||||
// Percentile indices in R5 result arrays (order must match task.percentiles in buildTask)
|
||||
private static final int PERCENTILE_BEST = 0; // 5th percentile (transit only)
|
||||
private static final int PERCENTILE_MEDIAN = 1; // 50th percentile (transit: index 1, others: index 0)
|
||||
|
|
@ -44,22 +65,69 @@ public class Router {
|
|||
/** Result of computing travel times for a single origin with spatial pre-filtering. */
|
||||
record FilteredResult(int[] originalIndices, short[] times, short[] bestTimes, String[] journeys) {}
|
||||
|
||||
/**
|
||||
* Global transit tile: a destination subset bundled with the FreeFormPointSet
|
||||
* R5 routes against. Reused across origins so R5's LinkageCache (and the
|
||||
* expensive EgressCostTable) is built once per tile, not once per origin × chunk.
|
||||
*/
|
||||
record PostcodeTile(
|
||||
FreeFormPointSet pointSet,
|
||||
WebMercatorExtents extents,
|
||||
int[] originalIndices,
|
||||
double minLat, double maxLat, double minLon, double maxLon
|
||||
) {}
|
||||
|
||||
/** True for any transit variant (transit, transit-no-bus, transit-no-change, …). */
|
||||
static boolean isTransitMode(String mode) {
|
||||
return mode.startsWith("transit");
|
||||
}
|
||||
|
||||
/** Max plausible travel radius in km for {@link #MAX_TRIP_DURATION_MINUTES}-minute trips. */
|
||||
static double maxRadiusKm(String mode) {
|
||||
if (isTransitMode(mode)) return 150;
|
||||
return switch (mode) {
|
||||
case "car" -> 150;
|
||||
case "transit" -> 150;
|
||||
case "bicycle" -> 60;
|
||||
case "walking" -> 12;
|
||||
default -> throw new IllegalArgumentException("Unknown mode: " + mode);
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Transit variant configuration. {@code maxRides} is the number of transit legs:
|
||||
* 1 = walk-transit-walk (no change), 2 = one change, 3 = two changes.
|
||||
* {@code excludeBus} drops {@link TransitModes#BUS} from the allowed mode set.
|
||||
*/
|
||||
private record TransitConfig(int maxRides, boolean excludeBus) {}
|
||||
|
||||
private static TransitConfig transitConfigFor(String mode) {
|
||||
return switch (mode) {
|
||||
case "transit" -> new TransitConfig(3, false);
|
||||
case "transit-no-bus" -> new TransitConfig(3, true);
|
||||
case "transit-no-change" -> new TransitConfig(1, false);
|
||||
case "transit-no-change-no-bus" -> new TransitConfig(1, true);
|
||||
case "transit-one-change" -> new TransitConfig(2, false);
|
||||
case "transit-one-change-no-bus" -> new TransitConfig(2, true);
|
||||
default -> throw new IllegalArgumentException("Unknown transit mode: " + mode);
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Load or build the transport network with Kryo caching.
|
||||
* The returned network is read-only after buildDistanceTables — safe for concurrent use.
|
||||
*
|
||||
* The evictable LinkageCache is left small (32 entries) because non-transit modes
|
||||
* create one huge per-origin LinkedPointSet each (~1M dests for car @ 150km radius).
|
||||
* Caching 1024 such entries OOMs the heap. Transit tile linkages instead go into
|
||||
* the unevictable {@code linkageMap} via {@link #preloadTransitTileLinkages} after
|
||||
* tiles are built — that map has no count limit and is checked first on lookup.
|
||||
*/
|
||||
static TransportNetwork loadNetwork(String dataDir, String cacheDir) throws Exception {
|
||||
// Must be set BEFORE the TransportNetwork is deserialized, since its LinkageCache
|
||||
// is constructed (and sized) during that deserialization. 32 fits the working
|
||||
// set of non-transit per-origin linkages without exhausting heap.
|
||||
LinkageCache.LINKAGE_CACHE_SIZE = 32;
|
||||
|
||||
System.err.println("Loading transport network...");
|
||||
File cacheFile = new File(cacheDir, "network.dat");
|
||||
TransportNetwork network;
|
||||
|
|
@ -111,20 +179,121 @@ public class Router {
|
|||
System.err.printf(" Active transit services on %s: %,d%n", date, activeServices.cardinality());
|
||||
}
|
||||
|
||||
/**
|
||||
* Estimate routing workload for an origin: count of postcodes within mode radius.
|
||||
* Cheap STRtree bbox query; used as the LPT sort key for scheduling.
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
static int estimateWorkload(STRtree postcodeIndex, double originLat, double originLon, double maxRadiusKm) {
|
||||
double degLat = maxRadiusKm / 111.0;
|
||||
double degLon = maxRadiusKm / (111.0 * Math.cos(Math.toRadians(originLat)));
|
||||
Envelope env = new Envelope(
|
||||
originLon - degLon, originLon + degLon,
|
||||
originLat - degLat, originLat + degLat);
|
||||
return postcodeIndex.query(env).size();
|
||||
}
|
||||
|
||||
/**
|
||||
* Build an STRtree spatial index over postcode points. Forces an initial query
|
||||
* to trigger lazy build, so the returned tree is safe for concurrent queries.
|
||||
*/
|
||||
static STRtree buildPostcodeIndex(double[] lats, double[] lons) {
|
||||
STRtree tree = new STRtree();
|
||||
for (int i = 0; i < lats.length; i++) {
|
||||
tree.insert(new Envelope(lons[i], lons[i], lats[i], lats[i]), Integer.valueOf(i));
|
||||
}
|
||||
// Force build (otherwise the first concurrent query races on lazy init)
|
||||
tree.query(new Envelope(0, 0, 0, 0));
|
||||
return tree;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build WALK linkages for every transit tile and store them as unevictable on
|
||||
* the network's LinkageCache. Subsequent transit routing calls get cache hits
|
||||
* regardless of how many per-origin car/bike/walk linkages cycle through the
|
||||
* evictable LRU. Logs progress because this is multi-minute work.
|
||||
*/
|
||||
static void preloadTransitTileLinkages(TransportNetwork network, List<PostcodeTile> tiles) {
|
||||
System.err.printf("Pre-building WALK linkages for %,d transit tiles (unevictable)...%n", tiles.size());
|
||||
long t0 = System.currentTimeMillis();
|
||||
int n = tiles.size();
|
||||
for (int i = 0; i < n; i++) {
|
||||
network.linkageCache.buildUnevictableLinkage(
|
||||
tiles.get(i).pointSet(), network.streetLayer, StreetMode.WALK);
|
||||
if ((i + 1) % 25 == 0 || i + 1 == n) {
|
||||
double secs = (System.currentTimeMillis() - t0) / 1000.0;
|
||||
System.err.printf(" %,d/%,d tile linkages built (%.1fs elapsed)%n", i + 1, n, secs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Build global transit tiles ONCE from all postcodes. Each tile holds a
|
||||
* FreeFormPointSet that is reused across every transit origin, so R5's
|
||||
* LinkageCache hits and the EgressCostTable is built once per tile rather
|
||||
* than once per (origin × chunk). This is the dominant transit speed-up.
|
||||
*
|
||||
* Tiles are sized to the R5 path-result hard limit (PATH_MAX_DESTINATIONS=5000)
|
||||
* so the same tiles serve both path-recording and non-path transit requests.
|
||||
*/
|
||||
static List<PostcodeTile> buildGlobalTransitTiles(double[] lats, double[] lons) {
|
||||
int n = lats.length;
|
||||
int[] sorted = sortIndicesByLat(lats);
|
||||
|
||||
// Global lon span sets gridWidth — all tiles share the same horizontal extent
|
||||
// bound, so each tile is a horizontal band of postcodes.
|
||||
double minLon = Double.MAX_VALUE, maxLon = -Double.MAX_VALUE;
|
||||
for (double lon : lons) {
|
||||
minLon = Math.min(minLon, lon);
|
||||
maxLon = Math.max(maxLon, lon);
|
||||
}
|
||||
int totalPixels = 256 << ZOOM;
|
||||
int gridWidth = lonToPixel(maxLon, totalPixels) - lonToPixel(minLon, totalPixels) + 1;
|
||||
int maxHeight = MAX_GRID_CELLS / gridWidth;
|
||||
|
||||
List<PostcodeTile> tiles = new ArrayList<>();
|
||||
int start = 0;
|
||||
while (start < n) {
|
||||
int end = start + 1;
|
||||
int topPixel = latToPixel(lats[sorted[start]], totalPixels);
|
||||
|
||||
while (end < n) {
|
||||
if (end - start >= PATH_MAX_DESTINATIONS) break;
|
||||
int bottomPixel = latToPixel(lats[sorted[end]], totalPixels);
|
||||
if (Math.abs(bottomPixel - topPixel) + 1 > maxHeight) break;
|
||||
end++;
|
||||
}
|
||||
|
||||
tiles.add(buildTile(lats, lons, sorted, start, end));
|
||||
start = end;
|
||||
}
|
||||
return tiles;
|
||||
}
|
||||
|
||||
/**
|
||||
* Filter destinations by distance, build chunks, compute travel times for one origin.
|
||||
* Returns only the filtered subset indices and their travel times.
|
||||
*
|
||||
* Transit uses {@code globalTiles} (shared FreeFormPointSets → linkage cache hits);
|
||||
* other modes use per-origin filter+chunk (no expensive linkage to amortize, and
|
||||
* tiling would force routing to many irrelevant destinations).
|
||||
*/
|
||||
static FilteredResult computeForOrigin(
|
||||
TransportNetwork network,
|
||||
STRtree postcodeIndex,
|
||||
List<PostcodeTile> globalTiles,
|
||||
double[] allLats, double[] allLons,
|
||||
double originLat, double originLon,
|
||||
String mode, LocalDate date, boolean enablePaths) {
|
||||
|
||||
if (isTransitMode(mode)) {
|
||||
return computeTransit(network, globalTiles, originLat, originLon, mode, date, enablePaths);
|
||||
}
|
||||
|
||||
double maxRadius = maxRadiusKm(mode);
|
||||
|
||||
// 1. Filter destinations by bounding box
|
||||
int[] filtered = filterByDistance(allLats, allLons, originLat, originLon, maxRadius);
|
||||
// 1. Filter destinations by bounding box (STRtree query)
|
||||
int[] filtered = filterByDistance(postcodeIndex, originLat, originLon, maxRadius);
|
||||
if (filtered.length == 0) {
|
||||
return new FilteredResult(new int[0], new short[0], null, null);
|
||||
}
|
||||
|
|
@ -137,56 +306,109 @@ public class Router {
|
|||
fLons[i] = allLons[filtered[i]];
|
||||
}
|
||||
|
||||
// 3. Build chunks — smaller when path recording is active (R5 PathResult limit: 5000)
|
||||
boolean isTransit = mode.equals("transit");
|
||||
boolean recordPaths = isTransit && enablePaths;
|
||||
int maxDestsPerChunk = recordPaths ? PATH_MAX_DESTINATIONS : Integer.MAX_VALUE;
|
||||
List<DestinationChunk> chunks = buildDestinationChunks(fLats, fLons, maxDestsPerChunk);
|
||||
// 3. Build per-origin chunks. Cap at DIRECT_MAX_DESTINATIONS so car at 150km
|
||||
// radius (~1M dests for London) gets split into ~7 manageable chunks
|
||||
// instead of one giant LinkedPointSet allocation.
|
||||
List<DestinationChunk> chunks = buildDestinationChunks(fLats, fLons, DIRECT_MAX_DESTINATIONS);
|
||||
|
||||
// 4. Compute travel times (and optionally paths)
|
||||
String[] journeys = recordPaths ? new String[fLats.length] : null;
|
||||
short[][] allTimes = computeTravelTimes(
|
||||
network, chunks, originLat, originLon, mode, fLats.length, date,
|
||||
recordPaths, journeys);
|
||||
// 4. Compute travel times
|
||||
short[][] allTimes = computeTravelTimesDirect(network, chunks, originLat, originLon, mode, fLats.length, date);
|
||||
return new FilteredResult(filtered, allTimes[0], null, null);
|
||||
}
|
||||
|
||||
// Transit requests [5th, 50th] percentiles; others request [50th] only
|
||||
short[] medianTimes = isTransit ? allTimes[PERCENTILE_MEDIAN] : allTimes[0];
|
||||
short[] bestTimes = isTransit ? allTimes[PERCENTILE_BEST] : null;
|
||||
return new FilteredResult(filtered, medianTimes, bestTimes, journeys);
|
||||
/**
|
||||
* Transit routing path: route from origin to every global tile whose bbox intersects
|
||||
* the origin's max-radius bbox. Reuses tile FreeFormPointSets for R5 LinkageCache hits.
|
||||
* {@code mode} selects the transit variant (rides cap, bus exclusion).
|
||||
*/
|
||||
private static FilteredResult computeTransit(
|
||||
TransportNetwork network, List<PostcodeTile> globalTiles,
|
||||
double originLat, double originLon, String mode, LocalDate date, boolean enablePaths) {
|
||||
|
||||
double maxRadius = maxRadiusKm(mode);
|
||||
double degLat = maxRadius / 111.0;
|
||||
double degLon = maxRadius / (111.0 * Math.cos(Math.toRadians(originLat)));
|
||||
double oMinLat = originLat - degLat, oMaxLat = originLat + degLat;
|
||||
double oMinLon = originLon - degLon, oMaxLon = originLon + degLon;
|
||||
|
||||
List<PostcodeTile> selected = new ArrayList<>();
|
||||
int totalDests = 0;
|
||||
for (PostcodeTile tile : globalTiles) {
|
||||
if (tile.maxLat() < oMinLat || tile.minLat() > oMaxLat) continue;
|
||||
if (tile.maxLon() < oMinLon || tile.minLon() > oMaxLon) continue;
|
||||
selected.add(tile);
|
||||
totalDests += tile.originalIndices().length;
|
||||
}
|
||||
|
||||
if (selected.isEmpty()) {
|
||||
return new FilteredResult(new int[0], new short[0], new short[0],
|
||||
enablePaths ? new String[0] : null);
|
||||
}
|
||||
|
||||
int[] outIndices = new int[totalDests];
|
||||
short[] medianTimes = new short[totalDests];
|
||||
short[] bestTimes = new short[totalDests];
|
||||
Arrays.fill(medianTimes, (short) -1);
|
||||
Arrays.fill(bestTimes, (short) -1);
|
||||
String[] journeys = enablePaths ? new String[totalDests] : null;
|
||||
|
||||
int offset = 0;
|
||||
for (PostcodeTile tile : selected) {
|
||||
int tileLen = tile.originalIndices().length;
|
||||
System.arraycopy(tile.originalIndices(), 0, outIndices, offset, tileLen);
|
||||
|
||||
RegionalTask task = buildTaskForTile(tile, originLat, originLon, mode, date, enablePaths);
|
||||
TravelTimeComputer computer = new TravelTimeComputer(task, network);
|
||||
OneOriginResult result = computer.computeTravelTimes();
|
||||
|
||||
TravelTimeResult tt = result.travelTimes;
|
||||
if (tt == null) {
|
||||
throw new RuntimeException("R5 returned null travelTimes for tile with " + tileLen + " destinations");
|
||||
}
|
||||
int[][] values = tt.getValues();
|
||||
if (values.length < 2) {
|
||||
throw new RuntimeException("R5 returned " + values.length + " percentiles, expected 2");
|
||||
}
|
||||
for (int i = 0; i < tileLen; i++) {
|
||||
if (values[PERCENTILE_BEST][i] != Integer.MAX_VALUE) {
|
||||
bestTimes[offset + i] = (short) values[PERCENTILE_BEST][i];
|
||||
}
|
||||
if (values[PERCENTILE_MEDIAN][i] != Integer.MAX_VALUE) {
|
||||
medianTimes[offset + i] = (short) values[PERCENTILE_MEDIAN][i];
|
||||
}
|
||||
}
|
||||
|
||||
if (enablePaths && result.paths != null) {
|
||||
extractPathsIntoOffset(result.paths, tileLen, offset, network.transitLayer, journeys);
|
||||
}
|
||||
|
||||
offset += tileLen;
|
||||
}
|
||||
|
||||
return new FilteredResult(outIndices, medianTimes, bestTimes, journeys);
|
||||
}
|
||||
|
||||
/**
|
||||
* Filter destination indices to those within a bounding box of maxRadiusKm from origin.
|
||||
* Uses degree-based approximation — slightly overestimates at corners, which is fine.
|
||||
* Backed by STRtree: O(log n + k) per query instead of O(n) scan.
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
private static int[] filterByDistance(
|
||||
double[] lats, double[] lons,
|
||||
STRtree postcodeIndex,
|
||||
double originLat, double originLon,
|
||||
double maxRadiusKm) {
|
||||
|
||||
double degLat = maxRadiusKm / 111.0;
|
||||
double degLon = maxRadiusKm / (111.0 * Math.cos(Math.toRadians(originLat)));
|
||||
|
||||
double minLat = originLat - degLat;
|
||||
double maxLat = originLat + degLat;
|
||||
double minLon = originLon - degLon;
|
||||
double maxLon = originLon + degLon;
|
||||
Envelope queryEnv = new Envelope(
|
||||
originLon - degLon, originLon + degLon,
|
||||
originLat - degLat, originLat + degLat);
|
||||
|
||||
// Two-pass: count then fill (avoids ArrayList/boxing overhead)
|
||||
int count = 0;
|
||||
for (int i = 0; i < lats.length; i++) {
|
||||
if (lats[i] >= minLat && lats[i] <= maxLat && lons[i] >= minLon && lons[i] <= maxLon) {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
int[] result = new int[count];
|
||||
int j = 0;
|
||||
for (int i = 0; i < lats.length; i++) {
|
||||
if (lats[i] >= minLat && lats[i] <= maxLat && lons[i] >= minLon && lons[i] <= maxLon) {
|
||||
result[j++] = i;
|
||||
}
|
||||
}
|
||||
List<Integer> hits = postcodeIndex.query(queryEnv);
|
||||
int[] result = new int[hits.size()];
|
||||
for (int i = 0; i < hits.size(); i++) result[i] = hits.get(i);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
@ -199,10 +421,10 @@ public class Router {
|
|||
double[] lats, double[] lons, int maxDestsPerChunk) {
|
||||
int n = lats.length;
|
||||
|
||||
// Sort indices by latitude for geographic chunking
|
||||
Integer[] sorted = new Integer[n];
|
||||
for (int i = 0; i < n; i++) sorted[i] = i;
|
||||
Arrays.sort(sorted, (a, b) -> Double.compare(lats[a], lats[b]));
|
||||
// Sort indices by latitude for geographic chunking — primitive long sort to
|
||||
// avoid Integer[] autoboxing per origin (millions of Integer allocs at scale).
|
||||
// Pack: high 32 bits = lat as sortable int, low 32 bits = original index.
|
||||
int[] sorted = sortIndicesByLat(lats);
|
||||
|
||||
// Determine grid width (longitude span is the same for all chunks)
|
||||
double minLon = Double.MAX_VALUE, maxLon = -Double.MAX_VALUE;
|
||||
|
|
@ -236,22 +458,18 @@ public class Router {
|
|||
}
|
||||
|
||||
/**
|
||||
* Compute travel times from one origin to all destinations across all chunks.
|
||||
* Returns one short[] per requested percentile (transit gets 2: best + median, others get 1: median).
|
||||
* When recordPaths is true, also extracts journey instructions into journeysOut.
|
||||
* Compute travel times for a non-transit mode (single percentile, no paths).
|
||||
* Result is indexed into the per-origin filtered subset (via chunk.originalIndices).
|
||||
*/
|
||||
private static short[][] computeTravelTimes(
|
||||
private static short[][] computeTravelTimesDirect(
|
||||
TransportNetwork network, List<DestinationChunk> chunks,
|
||||
double originLat, double originLon, String mode, int nDest, LocalDate date,
|
||||
boolean recordPaths, String[] journeysOut) {
|
||||
double originLat, double originLon, String mode, int nDest, LocalDate date) {
|
||||
|
||||
boolean isTransit = mode.equals("transit");
|
||||
int nPercentiles = isTransit ? 2 : 1;
|
||||
short[][] allTimes = new short[nPercentiles][nDest];
|
||||
for (short[] arr : allTimes) Arrays.fill(arr, (short) -1);
|
||||
short[][] allTimes = new short[1][nDest];
|
||||
Arrays.fill(allTimes[0], (short) -1);
|
||||
|
||||
for (DestinationChunk chunk : chunks) {
|
||||
RegionalTask task = buildTask(chunk, originLat, originLon, mode, date, recordPaths);
|
||||
RegionalTask task = buildTask(chunk, originLat, originLon, mode, date, false);
|
||||
TravelTimeComputer computer = new TravelTimeComputer(task, network);
|
||||
OneOriginResult result = computer.computeTravelTimes();
|
||||
|
||||
|
|
@ -261,41 +479,29 @@ public class Router {
|
|||
+ chunk.originalIndices.length + " destinations");
|
||||
}
|
||||
int[][] values = tt.getValues();
|
||||
if (values.length < nPercentiles) {
|
||||
throw new RuntimeException("R5 returned " + values.length + " percentiles, expected "
|
||||
+ nPercentiles);
|
||||
if (values.length < 1) {
|
||||
throw new RuntimeException("R5 returned 0 percentiles, expected 1");
|
||||
}
|
||||
for (int p = 0; p < nPercentiles; p++) {
|
||||
if (values[p].length < chunk.originalIndices.length) {
|
||||
throw new RuntimeException("R5 returned " + values[p].length
|
||||
+ " travel times for percentile " + p + ", expected "
|
||||
+ chunk.originalIndices.length);
|
||||
for (int i = 0; i < chunk.originalIndices.length; i++) {
|
||||
if (values[0][i] != Integer.MAX_VALUE) {
|
||||
allTimes[0][chunk.originalIndices[i]] = (short) values[0][i];
|
||||
}
|
||||
for (int i = 0; i < chunk.originalIndices.length; i++) {
|
||||
if (values[p][i] != Integer.MAX_VALUE) {
|
||||
allTimes[p][chunk.originalIndices[i]] = (short) values[p][i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Extract path data for transit
|
||||
if (recordPaths && journeysOut != null && result.paths != null) {
|
||||
extractPaths(result.paths, chunk.originalIndices, network.transitLayer, journeysOut);
|
||||
}
|
||||
}
|
||||
return allTimes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract the most common journey pattern for each destination in a chunk.
|
||||
* Produces a JSON array of legs: [{mode, from?, to?, minutes}, ...].
|
||||
* Extract the most common journey pattern for each destination in a tile,
|
||||
* writing into a combined output array at the given offset.
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
private static void extractPaths(
|
||||
PathResult paths, int[] originalIndices, TransitLayer transitLayer,
|
||||
private static void extractPathsIntoOffset(
|
||||
PathResult paths, int tileLen, int offset, TransitLayer transitLayer,
|
||||
String[] journeysOut) {
|
||||
Multimap<RouteSequence, PathResult.Iteration>[] allPaths = paths.iterationsForPathTemplates;
|
||||
for (int i = 0; i < originalIndices.length && i < allPaths.length; i++) {
|
||||
int n = Math.min(tileLen, allPaths.length);
|
||||
for (int i = 0; i < n; i++) {
|
||||
Multimap<RouteSequence, PathResult.Iteration> destPaths = allPaths[i];
|
||||
if (destPaths == null || destPaths.isEmpty()) continue;
|
||||
|
||||
|
|
@ -311,7 +517,7 @@ public class Router {
|
|||
}
|
||||
if (bestRoute == null) continue;
|
||||
|
||||
journeysOut[originalIndices[i]] = buildJourneyJson(bestRoute, transitLayer);
|
||||
journeysOut[offset + i] = buildJourneyJson(bestRoute, transitLayer);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -362,8 +568,29 @@ public class Router {
|
|||
|
||||
private record DestinationChunk(FreeFormPointSet pointSet, WebMercatorExtents extents, int[] originalIndices) {}
|
||||
|
||||
/**
|
||||
* Sort destination indices by latitude using a primitive long-packed sort.
|
||||
* Encodes lat as a fixed-point microdeg int (+offset to keep it non-negative
|
||||
* for any plausible lat) so high 32 bits of the packed long give a monotonic
|
||||
* sort key. Low 32 bits hold the original index, breaking ties deterministically.
|
||||
*/
|
||||
private static int[] sortIndicesByLat(double[] lats) {
|
||||
int n = lats.length;
|
||||
long[] packed = new long[n];
|
||||
// Offset by 90° so any lat in [-90, 90] maps to a non-negative key
|
||||
long offset = 900_000_000L;
|
||||
for (int i = 0; i < n; i++) {
|
||||
long latKey = (long) Math.round(lats[i] * 10_000_000L) + offset;
|
||||
packed[i] = (latKey << 32) | (i & 0xFFFFFFFFL);
|
||||
}
|
||||
Arrays.sort(packed);
|
||||
int[] sorted = new int[n];
|
||||
for (int i = 0; i < n; i++) sorted[i] = (int) (packed[i] & 0xFFFFFFFFL);
|
||||
return sorted;
|
||||
}
|
||||
|
||||
private static DestinationChunk buildChunk(
|
||||
double[] lats, double[] lons, Integer[] sorted, int start, int end) {
|
||||
double[] lats, double[] lons, int[] sorted, int start, int end) {
|
||||
int size = end - start;
|
||||
int[] originalIndices = new int[size];
|
||||
Coordinate[] coords = new Coordinate[size];
|
||||
|
|
@ -392,6 +619,69 @@ public class Router {
|
|||
return new DestinationChunk(pointSet, extents, originalIndices);
|
||||
}
|
||||
|
||||
/** Like {@link #buildChunk} but produces a {@link PostcodeTile} with bbox + global indices. */
|
||||
private static PostcodeTile buildTile(
|
||||
double[] lats, double[] lons, int[] sorted, int start, int end) {
|
||||
int size = end - start;
|
||||
int[] originalIndices = new int[size];
|
||||
Coordinate[] coords = new Coordinate[size];
|
||||
double minLat = Double.MAX_VALUE, maxLat = -Double.MAX_VALUE;
|
||||
double minLon = Double.MAX_VALUE, maxLon = -Double.MAX_VALUE;
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
int idx = sorted[start + i];
|
||||
originalIndices[i] = idx;
|
||||
double lat = lats[idx], lon = lons[idx];
|
||||
coords[i] = new Coordinate(lon, lat);
|
||||
minLat = Math.min(minLat, lat);
|
||||
maxLat = Math.max(maxLat, lat);
|
||||
minLon = Math.min(minLon, lon);
|
||||
maxLon = Math.max(maxLon, lon);
|
||||
}
|
||||
|
||||
FreeFormPointSet pointSet = new FreeFormPointSet(coords);
|
||||
int totalPixels = 256 << ZOOM;
|
||||
int west = lonToPixel(minLon, totalPixels);
|
||||
int north = latToPixel(maxLat, totalPixels);
|
||||
int width = lonToPixel(maxLon, totalPixels) - west + 1;
|
||||
int height = latToPixel(minLat, totalPixels) - north + 1;
|
||||
WebMercatorExtents extents = new WebMercatorExtents(west, north, width, height, ZOOM);
|
||||
|
||||
return new PostcodeTile(pointSet, extents, originalIndices, minLat, maxLat, minLon, maxLon);
|
||||
}
|
||||
|
||||
/** Build a transit RegionalTask that targets one global tile, configured by {@code mode}. */
|
||||
private static RegionalTask buildTaskForTile(
|
||||
PostcodeTile tile, double originLat, double originLon, String mode, LocalDate date, boolean recordPaths) {
|
||||
RegionalTask task = new RegionalTask();
|
||||
task.fromLat = originLat;
|
||||
task.fromLon = originLon;
|
||||
task.date = date;
|
||||
task.percentiles = new int[]{5, 50};
|
||||
task.recordTimes = true;
|
||||
task.destinationPointSets = new PointSet[]{tile.pointSet()};
|
||||
task.zoom = tile.extents().zoom;
|
||||
task.west = tile.extents().west;
|
||||
task.north = tile.extents().north;
|
||||
task.width = tile.extents().width;
|
||||
task.height = tile.extents().height;
|
||||
task.fromTime = DEPARTURE_FROM_TIME;
|
||||
task.toTime = DEPARTURE_TO_TIME;
|
||||
task.maxTripDurationMinutes = MAX_TRIP_DURATION_MINUTES;
|
||||
// TfL GTFS uses frequency-based service patterns. With the default
|
||||
// monteCarloDraws=220 R5 runs 8 iters/min (~240 iters per 30-min window).
|
||||
// Set to 0 to use HALF_HEADWAY mode → 1 iter/min, deterministic, 8x cheaper.
|
||||
task.monteCarloDraws = 0;
|
||||
|
||||
if (recordPaths) {
|
||||
task.includePathResults = true;
|
||||
task.nPathsPerTarget = 1;
|
||||
}
|
||||
|
||||
configureMode(task, mode);
|
||||
return task;
|
||||
}
|
||||
|
||||
private static RegionalTask buildTask(
|
||||
DestinationChunk chunk, double originLat, double originLon, String mode, LocalDate date,
|
||||
boolean recordPaths) {
|
||||
|
|
@ -423,20 +713,25 @@ public class Router {
|
|||
}
|
||||
|
||||
private static void configureMode(RegionalTask task, String mode) {
|
||||
if (isTransitMode(mode)) {
|
||||
TransitConfig config = transitConfigFor(mode);
|
||||
task.maxRides = config.maxRides();
|
||||
task.maxWalkTime = TRANSIT_MAX_WALK_TIME_MIN;
|
||||
// R5 requires directModes ⊆ accessModes. BICYCLE egress is too expensive
|
||||
// (builds cost tables from 59k stops × N destinations), so keep WALK only
|
||||
// for egress and match access/direct to avoid the R5 validation error.
|
||||
task.accessModes = EnumSet.of(LegMode.WALK);
|
||||
task.egressModes = EnumSet.of(LegMode.WALK);
|
||||
task.directModes = EnumSet.of(LegMode.WALK);
|
||||
EnumSet<TransitModes> transitModes = EnumSet.allOf(TransitModes.class);
|
||||
if (config.excludeBus()) transitModes.remove(TransitModes.BUS);
|
||||
task.transitModes = transitModes;
|
||||
return;
|
||||
}
|
||||
switch (mode) {
|
||||
case "car" -> setDirectMode(task, LegMode.CAR);
|
||||
case "bicycle" -> setDirectMode(task, LegMode.BICYCLE);
|
||||
case "walking" -> setDirectMode(task, LegMode.WALK);
|
||||
case "transit" -> {
|
||||
task.maxRides = 4;
|
||||
// R5 requires directModes ⊆ accessModes. BICYCLE egress is too expensive
|
||||
// (builds cost tables from 59k stops × N destinations), so keep WALK only
|
||||
// for egress and match access/direct to avoid the R5 validation error.
|
||||
task.accessModes = EnumSet.of(LegMode.WALK);
|
||||
task.egressModes = EnumSet.of(LegMode.WALK);
|
||||
task.directModes = EnumSet.of(LegMode.WALK);
|
||||
task.transitModes = EnumSet.allOf(TransitModes.class);
|
||||
}
|
||||
default -> throw new IllegalArgumentException("Unknown mode: " + mode);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue