Lots of improvements

This commit is contained in:
Andras Schmelczer 2026-02-22 21:09:07 +00:00
parent 205302dbb8
commit eb02b5832b
39 changed files with 699 additions and 271 deletions

View file

@ -4,6 +4,9 @@ set -euo pipefail
# Batch-compute travel times from all places to all England postcodes
# for all transport modes (car, bicycle, walking, transit).
#
# Uses full England OSM + 2 GTFS feeds (BODS buses, National Rail).
# R5's TransportNetwork.fromDirectory() picks up all .osm.pbf and .zip files.
#
# Uses each place as origin with all postcodes as destinations — R5 does one
# routing computation per place, then reads off travel times to all postcodes.
# For car/bicycle/walking this is symmetric (place->postcode = postcode->place).
@ -15,11 +18,10 @@ set -euo pipefail
#
# Usage:
# ./r5-java/run.sh
# ./r5-java/run.sh --threads 8 --heap 24g --output-dir property-data/travel-times
# --- Defaults ---
THREADS=16
HEAP=16g
THREADS=4
HEAP=12g
NETWORK_DIR=property-data/r5-network
OUTPUT_BASE=property-data/travel-times
R5_DIR=r5-java
@ -102,25 +104,26 @@ fi
# R5 writes .mapdb temp files next to OSM/GTFS files during network construction.
# Copy source data to a writable build dir to avoid polluting the originals.
mkdir -p "$NETWORK_DIR"
DATA_DIR="property-data/transit"
TRANSIT_SRC="property-data/transit"
NETWORK_DATA_DIR="$TRANSIT_SRC"
if [ ! -f "$NETWORK_DIR/network.dat" ]; then
BUILD_DIR="$NETWORK_DIR/build"
echo "--- No cached network — copying transit data to build dir ---"
mkdir -p "$BUILD_DIR"
if ! cp property-data/transit/raw/*.osm.pbf "$BUILD_DIR/" 2>/dev/null; then
echo "Warning: no .osm.pbf files found in property-data/transit/raw/"
if ! cp "$TRANSIT_SRC"/raw/*.osm.pbf "$BUILD_DIR/" 2>/dev/null; then
echo "Warning: no .osm.pbf files found in $TRANSIT_SRC/raw/"
fi
if ! cp property-data/transit/*.zip "$BUILD_DIR/" 2>/dev/null; then
echo "Warning: no .zip files found in property-data/transit/"
if ! cp "$TRANSIT_SRC"/*.zip "$BUILD_DIR/" 2>/dev/null; then
echo "Warning: no .zip files found in $TRANSIT_SRC/"
fi
DATA_DIR="$BUILD_DIR"
NETWORK_DATA_DIR="$BUILD_DIR"
fi
# --- Step 5: Run batch ---
echo ""
echo "--- Starting batch computation ---"
DATA_DIR="$DATA_DIR" NETWORK_CACHE_DIR="$NETWORK_DIR" \
DATA_DIR="$NETWORK_DATA_DIR" NETWORK_CACHE_DIR="$NETWORK_DIR" \
java -Xmx"$HEAP" -cp "$OUT_DIR:$LIB_DIR/*" propertymap.App \
--postcodes property-data/arcgis_data.parquet \
--places property-data/places.parquet \

View file

@ -192,6 +192,9 @@ public class App {
if (attempt < MAX_RETRIES) {
System.err.printf("%n [RETRY %d/%d] %s: %s%n",
attempt + 1, MAX_RETRIES, name, e.getMessage());
} else {
System.err.printf("%n [FAIL TRACE] %s:%n", name);
e.printStackTrace(System.err);
}
}
}
@ -215,7 +218,7 @@ public class App {
String safe = name.toLowerCase()
.replaceAll("[^a-z0-9 -]", "")
.replaceAll("\\s+", "-");
return String.format("%04d-%s.parquet", index, safe);
return String.format("%06d-%s.parquet", index, safe);
}
private static String requiredArg(String[] args, String name) {

View file

@ -29,6 +29,10 @@ public class Router {
private static final int DEPARTURE_TO_TIME = 9 * 3600; // 09:00
private static final int MAX_TRIP_DURATION_MINUTES = 120;
// Percentile indices in R5 result arrays (order must match task.percentiles in buildTask)
private static final int PERCENTILE_BEST = 0; // 5th percentile (transit only)
private static final int PERCENTILE_MEDIAN = 1; // 50th percentile (transit: index 1, others: index 0)
/** Result of computing travel times for a single origin with spatial pre-filtering. */
record FilteredResult(int[] originalIndices, short[] times, short[] bestTimes) {}
@ -102,10 +106,9 @@ public class Router {
boolean isTransit = mode.equals("transit");
short[][] allTimes = computeTravelTimes(network, chunks, originLat, originLon, mode, fLats.length, date);
// For transit: allTimes[0]=best (5th percentile), allTimes[1]=median (50th)
// For others: allTimes[0]=median (50th), no best
short[] medianTimes = isTransit ? allTimes[1] : allTimes[0];
short[] bestTimes = isTransit ? allTimes[0] : null;
// Transit requests [5th, 50th] percentiles; others request [50th] only
short[] medianTimes = isTransit ? allTimes[PERCENTILE_MEDIAN] : allTimes[0];
short[] bestTimes = isTransit ? allTimes[PERCENTILE_BEST] : null;
return new FilteredResult(filtered, medianTimes, bestTimes);
}
@ -205,13 +208,24 @@ public class Router {
OneOriginResult result = computer.computeTravelTimes();
TravelTimeResult tt = result.travelTimes;
if (tt != null) {
int[][] values = tt.getValues();
for (int p = 0; p < nPercentiles && p < values.length; p++) {
for (int i = 0; i < chunk.originalIndices.length && i < values[p].length; i++) {
if (values[p][i] != Integer.MAX_VALUE) {
allTimes[p][chunk.originalIndices[i]] = (short) values[p][i];
}
if (tt == null) {
throw new RuntimeException("R5 returned null travelTimes for chunk with "
+ chunk.originalIndices.length + " destinations");
}
int[][] values = tt.getValues();
if (values.length < nPercentiles) {
throw new RuntimeException("R5 returned " + values.length + " percentiles, expected "
+ nPercentiles);
}
for (int p = 0; p < nPercentiles; p++) {
if (values[p].length < chunk.originalIndices.length) {
throw new RuntimeException("R5 returned " + values[p].length
+ " travel times for percentile " + p + ", expected "
+ chunk.originalIndices.length);
}
for (int i = 0; i < chunk.originalIndices.length; i++) {
if (values[p][i] != Integer.MAX_VALUE) {
allTimes[p][chunk.originalIndices[i]] = (short) values[p][i];
}
}
}