This commit is contained in:
Andras Schmelczer 2026-05-26 19:45:13 +01:00
parent c645b0f1d4
commit 39ef5c6646
79 changed files with 5660 additions and 2199 deletions

View file

@ -17,8 +17,10 @@ set -euo pipefail
# - places_ref.parquet: place order reference
#
# Usage:
# ./r5-java/run.sh [--demo]
# --demo only compute Bank + TCR, transit only (quick test)
# ./r5-java/run.sh [--demo] [--cache-warmers]
# --demo only compute Bank + TCR, transit only (quick test)
# --cache-warmers only compute Bank + Tower Gateway DLR, all modes (LinkageCache warmup);
# skips origins whose output parquet already exists
# --- Defaults ---
THREADS=12
@ -32,16 +34,18 @@ NETWORK_DIR=property-data/r5-network
OUTPUT_BASE=property-data/travel-times
R5_DIR=r5-java
DEMO_FLAG=""
CACHE_WARMERS_FLAG=""
# --- Parse args ---
while [[ $# -gt 0 ]]; do
case $1 in
--threads) THREADS="$2"; shift 2 ;;
--heap) HEAP="$2"; shift 2 ;;
--network-dir) NETWORK_DIR="$2"; shift 2 ;;
--output-dir) OUTPUT_BASE="$2"; shift 2 ;;
--demo) DEMO_FLAG="--demo"; shift ;;
--demo-cars=*) DEMO_FLAG="--demo-cars ${1#--demo-cars=}"; shift ;;
--threads) THREADS="$2"; shift 2 ;;
--heap) HEAP="$2"; shift 2 ;;
--network-dir) NETWORK_DIR="$2"; shift 2 ;;
--output-dir) OUTPUT_BASE="$2"; shift 2 ;;
--demo) DEMO_FLAG="--demo"; shift ;;
--demo-cars=*) DEMO_FLAG="--demo-cars ${1#--demo-cars=}"; shift ;;
--cache-warmers) CACHE_WARMERS_FLAG="--cache-warmers"; shift ;;
*) echo "Unknown: $1"; exit 1 ;;
esac
done
@ -165,7 +169,7 @@ java -Xmx"$HEAP" \
--places property-data/places.parquet \
--output-dir "$OUTPUT_BASE" \
--threads "$THREADS" \
$DEMO_FLAG
$DEMO_FLAG $CACHE_WARMERS_FLAG
echo ""
echo "=== Complete ==="

View file

@ -63,15 +63,12 @@ public class App {
private static final Set<String> DEMO_PLACES = Set.of(
"Bank tube station", "Tottenham Court Road tube station");
/**
* Always-first origins (per-mode). The cache-warmest London core: these origins
* each touch ~100 unique global tiles, so running them up front builds the
* LinkageCache that every subsequent London-ish origin reuses.
* Order within this list is preserved in submission order.
* Origins computed by the {@code --cache-warmers} flag. The cache-warmest
* London core: these origins each touch ~100 unique global tiles, so running
* them populates the LinkageCache that every later London-ish origin reuses.
*/
private static final List<String> PRIORITY_PLACES = List.of(
"Bank tube station",
"Tower Gateway DLR station",
"Tottenham Court Road tube station");
private static final Set<String> CACHE_WARMER_PLACES = Set.of(
"Bank tube station", "Tower Gateway DLR station");
private static final int MAX_RETRIES = 2;
/** Writer pool size. Holds one DuckDB connection per thread. */
@ -122,6 +119,7 @@ public class App {
int threads = Integer.parseInt(optionalArg(args, "--threads", "4"));
boolean enablePaths = true;
boolean demo = hasFlag(args, "--demo");
boolean cacheWarmersOnly = hasFlag(args, "--cache-warmers");
Path outDir = Paths.get(outputDirStr);
Files.createDirectories(outDir);
@ -190,6 +188,15 @@ public class App {
modes = DEMO_MODES;
System.err.printf("DEMO MODE: %d places (transit only)%n", originIndices.length);
for (int i : originIndices) System.err.printf(" - %s%n", originNames[i]);
} else if (cacheWarmersOnly) {
List<Integer> warmerIdx = new ArrayList<>();
for (int i = 0; i < nOrigins; i++) {
if (CACHE_WARMER_PLACES.contains(originNames[i])) warmerIdx.add(i);
}
originIndices = warmerIdx.stream().mapToInt(Integer::intValue).toArray();
modes = MODES;
System.err.printf("CACHE-WARMERS MODE: %d places (all modes)%n", originIndices.length);
for (int i : originIndices) System.err.printf(" - %s%n", originNames[i]);
} else {
// Normal mode: use all travel-eligible England places
originIndices = englandIndices.stream().sorted()
@ -327,21 +334,12 @@ public class App {
return;
}
// Ordering policy:
// 1. PRIORITY_PLACES first, in the literal order they're listed (Bank, Tower
// Gateway DLR, TCR). These dense London origins are the best LinkageCache
// warmers every later origin in the SE benefits.
// 2. Then LPT (longest-processing-time-first): dense urban origins do far
// more work than rural ones. Submitting them first prevents a long tail
// where a few London origins finish after everything else drains.
// Ordering policy: LPT (longest-processing-time-first). Dense urban origins
// do far more work than rural ones; submitting them first prevents a long
// tail where a few London origins finish after everything else drains.
double modeRadius = Router.maxRadiusKm(mode);
remaining.sort(Comparator.<Integer, Integer>comparing(
idx -> {
int prio = PRIORITY_PLACES.indexOf(originNames[idx]);
return prio < 0 ? Integer.MAX_VALUE : prio;
})
.thenComparing(Comparator.comparingInt((Integer idx) ->
Router.estimateWorkload(postcodeIndex, originLats[idx], originLons[idx], modeRadius)).reversed()));
remaining.sort(Comparator.comparingInt((Integer idx) ->
Router.estimateWorkload(postcodeIndex, originLats[idx], originLons[idx], modeRadius)).reversed());
long startMs = System.currentTimeMillis();
int total = remaining.size();
@ -527,7 +525,7 @@ public class App {
if (args[i].equals(name)) return args[i + 1];
}
System.err.println("Missing required argument: " + name);
System.err.println("Usage: App --postcodes FILE --places FILE --output-dir DIR [--threads N] [--demo]");
System.err.println("Usage: App --postcodes FILE --places FILE --output-dir DIR [--threads N] [--demo] [--cache-warmers]");
System.exit(1);
return null; // unreachable
}