lmao
This commit is contained in:
parent
03445188ea
commit
524580eb25
102 changed files with 36625 additions and 1295 deletions
|
|
@ -14,13 +14,12 @@ set -euo pipefail
|
|||
# - places_ref.parquet: place order reference
|
||||
#
|
||||
# Usage:
|
||||
# ./r5-java/run.sh # 4 threads, 16g heap
|
||||
# ./r5-java/run.sh --threads 8
|
||||
# ./r5-java/run.sh --heap 24g
|
||||
# ./r5-java/run.sh
|
||||
# ./r5-java/run.sh --threads 8 --heap 24g
|
||||
|
||||
# --- Defaults ---
|
||||
THREADS=28
|
||||
HEAP=40g
|
||||
THREADS=16
|
||||
HEAP=16g
|
||||
NETWORK_DIR=property-data/r5-network
|
||||
OUTPUT_BASE=property-data/travel-times
|
||||
R5_DIR=r5-java
|
||||
|
|
@ -125,5 +124,5 @@ java -Xmx"$HEAP" -cp "$OUT_DIR:$LIB_DIR/*" propertymap.App \
|
|||
|
||||
echo ""
|
||||
echo "=== Complete ==="
|
||||
echo "Output: $OUTPUT_BASE/{car,bicycle,walking,transit}/"
|
||||
echo "Output: $OUTPUT_BASE/{car,bicycle,walking,transit}/{place-name}.parquet"
|
||||
echo "Reference: $OUTPUT_BASE/postcodes_ref.parquet, $OUTPUT_BASE/places_ref.parquet"
|
||||
|
|
|
|||
|
|
@ -16,15 +16,19 @@ import java.util.concurrent.TimeUnit;
|
|||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
/**
|
||||
* Batch-compute travel times from each origin (place) to all destinations (postcodes)
|
||||
* Batch-compute travel times from each origin (place) to nearby postcodes
|
||||
* for all transport modes (car, bicycle, walking, transit).
|
||||
*
|
||||
* Output per mode: one parquet file per origin in {output-dir}/{mode}/{index}.parquet
|
||||
* with columns (pcds VARCHAR, travel_minutes SMALLINT). -1 = unreachable within 120 min.
|
||||
* Each origin is spatially pre-filtered to only route to postcodes within a
|
||||
* plausible travel radius for the mode. Output is sparse: only reachable
|
||||
* postcodes are written (unreachable = absent from file).
|
||||
*
|
||||
* Output per mode: one parquet file per origin in {output-dir}/{mode}/{name}.parquet
|
||||
* with columns (pcds VARCHAR, travel_minutes SMALLINT).
|
||||
*/
|
||||
public class App {
|
||||
|
||||
private static final String[] MODES = {"car", "bicycle", "walking", "transit"};
|
||||
private static final String[] MODES = {"bicycle", "walking", "transit", "car"};
|
||||
private static final int MAX_RETRIES = 2;
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
|
|
@ -42,18 +46,14 @@ public class App {
|
|||
System.err.println("Loading postcodes (England only)...");
|
||||
Parquet.Postcodes postcodes = Parquet.loadEnglandPostcodes(
|
||||
postcodesPath, outDir.resolve("postcodes_ref.parquet"));
|
||||
int nDest = postcodes.lats().length;
|
||||
System.err.printf(" %,d postcodes%n", nDest);
|
||||
|
||||
List<Router.DestinationChunk> chunks = Router.buildDestinationChunks(postcodes.lats(), postcodes.lons());
|
||||
System.err.printf(" %,d postcodes%n", postcodes.lats().length);
|
||||
|
||||
System.err.println("Loading places (deduplicated)...");
|
||||
double[][] placesLatLon = Parquet.loadPlaces(placesPath, outDir.resolve("places_ref.parquet"));
|
||||
double[] originLats = placesLatLon[0], originLons = placesLatLon[1];
|
||||
Parquet.Places places = Parquet.loadPlaces(placesPath, outDir.resolve("places_ref.parquet"));
|
||||
String[] originNames = places.names();
|
||||
double[] originLats = places.lats(), originLons = places.lons();
|
||||
int nOrigins = originLats.length;
|
||||
System.err.printf(" %,d places%n", nOrigins);
|
||||
System.err.printf(" Estimated output: %.1f GB (%,d x %,d x 2B)%n",
|
||||
(double) nOrigins * nDest * 2 / 1e9, nOrigins, nDest);
|
||||
|
||||
// One thread pool shared across all modes
|
||||
ExecutorService pool = Executors.newFixedThreadPool(threads);
|
||||
|
|
@ -65,8 +65,8 @@ public class App {
|
|||
|
||||
try {
|
||||
for (String mode : MODES) {
|
||||
processMode(network, chunks, postcodes.codes(), originLats, originLons,
|
||||
nDest, outDir, mode, today, pool, threadConn);
|
||||
processMode(network, postcodes.codes(), postcodes.lats(), postcodes.lons(),
|
||||
originNames, originLats, originLons, outDir, mode, today, pool, threadConn);
|
||||
}
|
||||
} finally {
|
||||
pool.shutdown();
|
||||
|
|
@ -75,17 +75,19 @@ public class App {
|
|||
}
|
||||
|
||||
private static void processMode(
|
||||
TransportNetwork network, List<Router.DestinationChunk> chunks,
|
||||
String[] postcodes, double[] originLats, double[] originLons, int nDest,
|
||||
TransportNetwork network,
|
||||
String[] postcodes, double[] postcodeLats, double[] postcodeLons,
|
||||
String[] originNames, double[] originLats, double[] originLons,
|
||||
Path outDir, String mode, LocalDate date,
|
||||
ExecutorService pool, ThreadLocal<DuckDBConnection> threadConn) throws Exception {
|
||||
|
||||
int nOrigins = originLats.length;
|
||||
System.err.printf("%n=== %s ===%n", mode.toUpperCase());
|
||||
System.err.printf(" Radius: %.0f km%n", Router.maxRadiusKm(mode));
|
||||
Path modeDir = outDir.resolve(mode);
|
||||
Files.createDirectories(modeDir);
|
||||
|
||||
List<Integer> remaining = findRemaining(modeDir, nOrigins);
|
||||
List<Integer> remaining = findRemaining(modeDir, originNames);
|
||||
int alreadyDone = nOrigins - remaining.size();
|
||||
System.err.printf(" %,d done, %,d remaining%n", alreadyDone, remaining.size());
|
||||
|
||||
|
|
@ -121,12 +123,13 @@ public class App {
|
|||
for (int idx : remaining) {
|
||||
pool.submit(() -> {
|
||||
try {
|
||||
processOrigin(network, chunks, postcodes, originLats[idx], originLons[idx],
|
||||
nDest, modeDir, mode, date, idx, threadConn.get());
|
||||
processOrigin(network, postcodes, postcodeLats, postcodeLons,
|
||||
originLats[idx], originLons[idx],
|
||||
modeDir, mode, date, originNames[idx], threadConn.get());
|
||||
completed.incrementAndGet();
|
||||
} catch (Exception e) {
|
||||
failed.incrementAndGet();
|
||||
System.err.printf("%n [FAIL] origin %d: %s%n", idx, e.getMessage());
|
||||
System.err.printf("%n [FAIL] origin %s: %s%n", originNames[idx], e.getMessage());
|
||||
} finally {
|
||||
latch.countDown();
|
||||
}
|
||||
|
|
@ -144,24 +147,43 @@ public class App {
|
|||
|
||||
/** Compute and write travel times for a single origin, with retry on failure. */
|
||||
private static void processOrigin(
|
||||
TransportNetwork network, List<Router.DestinationChunk> chunks,
|
||||
String[] postcodes, double lat, double lon, int nDest,
|
||||
Path modeDir, String mode, LocalDate date, int idx,
|
||||
TransportNetwork network,
|
||||
String[] postcodes, double[] postcodeLats, double[] postcodeLons,
|
||||
double originLat, double originLon,
|
||||
Path modeDir, String mode, LocalDate date, String name,
|
||||
DuckDBConnection conn) throws Exception {
|
||||
|
||||
Path outPath = modeDir.resolve(String.format("%06d.parquet", idx));
|
||||
Path outPath = modeDir.resolve(sanitizeFilename(name) + ".parquet");
|
||||
Exception lastError = null;
|
||||
|
||||
for (int attempt = 0; attempt <= MAX_RETRIES; attempt++) {
|
||||
try {
|
||||
short[] times = Router.computeTravelTimes(network, chunks, lat, lon, mode, nDest, date);
|
||||
Parquet.writeTravelTimes(conn, outPath, postcodes, times);
|
||||
Router.FilteredResult result = Router.computeForOrigin(
|
||||
network, postcodeLats, postcodeLons,
|
||||
originLat, originLon, mode, date);
|
||||
|
||||
// Write only reachable postcodes (sparse output)
|
||||
int reachable = 0;
|
||||
for (short t : result.times()) if (t >= 0) reachable++;
|
||||
|
||||
String[] codes = new String[reachable];
|
||||
short[] times = new short[reachable];
|
||||
int j = 0;
|
||||
for (int i = 0; i < result.times().length; i++) {
|
||||
if (result.times()[i] >= 0) {
|
||||
codes[j] = postcodes[result.originalIndices()[i]];
|
||||
times[j] = result.times()[i];
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
Parquet.writeTravelTimes(conn, outPath, codes, times);
|
||||
return;
|
||||
} catch (Exception e) {
|
||||
lastError = e;
|
||||
if (attempt < MAX_RETRIES) {
|
||||
System.err.printf("%n [RETRY %d/%d] origin %d: %s%n",
|
||||
attempt + 1, MAX_RETRIES, idx, e.getMessage());
|
||||
System.err.printf("%n [RETRY %d/%d] %s: %s%n",
|
||||
attempt + 1, MAX_RETRIES, name, e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -169,10 +191,10 @@ public class App {
|
|||
}
|
||||
|
||||
/** Find origin indices that don't yet have output parquet files. */
|
||||
private static List<Integer> findRemaining(Path modeDir, int nOrigins) throws Exception {
|
||||
private static List<Integer> findRemaining(Path modeDir, String[] names) throws Exception {
|
||||
List<Integer> remaining = new ArrayList<>();
|
||||
for (int i = 0; i < nOrigins; i++) {
|
||||
Path f = modeDir.resolve(String.format("%06d.parquet", i));
|
||||
for (int i = 0; i < names.length; i++) {
|
||||
Path f = modeDir.resolve(sanitizeFilename(names[i]) + ".parquet");
|
||||
if (!Files.exists(f) || Files.size(f) == 0) {
|
||||
remaining.add(i);
|
||||
}
|
||||
|
|
@ -180,6 +202,13 @@ public class App {
|
|||
return remaining;
|
||||
}
|
||||
|
||||
/** Sanitize a place name into a safe filename (lowercase, spaces to hyphens, strip non-alphanumeric). */
|
||||
private static String sanitizeFilename(String name) {
|
||||
return name.toLowerCase()
|
||||
.replaceAll("[^a-z0-9 -]", "")
|
||||
.replaceAll("\\s+", "-");
|
||||
}
|
||||
|
||||
private static String requiredArg(String[] args, String name) {
|
||||
for (int i = 0; i < args.length - 1; i++) {
|
||||
if (args[i].equals(name)) return args[i + 1];
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ import java.util.List;
|
|||
public class Parquet {
|
||||
|
||||
record Postcodes(String[] codes, double[] lats, double[] lons) {}
|
||||
record Places(String[] names, double[] lats, double[] lons) {}
|
||||
|
||||
static {
|
||||
try { Class.forName("org.duckdb.DuckDBDriver"); }
|
||||
|
|
@ -26,7 +27,7 @@ public class Parquet {
|
|||
static Postcodes loadEnglandPostcodes(String parquetPath, Path refOut) throws Exception {
|
||||
try (DuckDBConnection conn = connect(); Statement stmt = conn.createStatement()) {
|
||||
stmt.execute("CREATE TABLE postcodes AS SELECT pcds, lat, \"long\" FROM read_parquet('"
|
||||
+ parquetPath + "') WHERE ctry = 'E92000001'");
|
||||
+ parquetPath + "') WHERE ctry = 'E92000001' AND doterm IS NULL");
|
||||
copyToParquet(stmt, "SELECT * FROM postcodes", refOut);
|
||||
|
||||
try (ResultSet rs = stmt.executeQuery("SELECT COUNT(*) FROM postcodes")) {
|
||||
|
|
@ -50,8 +51,8 @@ public class Parquet {
|
|||
}
|
||||
}
|
||||
|
||||
/** Load places deduplicated by lat/lon, write reference parquet, return flat lat/lon arrays. */
|
||||
static double[][] loadPlaces(String parquetPath, Path refOut) throws Exception {
|
||||
/** Load places deduplicated by lat/lon, write reference parquet, return names + flat lat/lon arrays. */
|
||||
static Places loadPlaces(String parquetPath, Path refOut) throws Exception {
|
||||
try (DuckDBConnection conn = connect(); Statement stmt = conn.createStatement()) {
|
||||
stmt.execute("CREATE TABLE places AS SELECT * EXCLUDE (rn) FROM ("
|
||||
+ "SELECT *, ROW_NUMBER() OVER (PARTITION BY lat, lon) AS rn "
|
||||
|
|
@ -61,19 +62,20 @@ public class Parquet {
|
|||
try (ResultSet rs = stmt.executeQuery("SELECT COUNT(*) FROM places")) {
|
||||
rs.next();
|
||||
int n = rs.getInt(1);
|
||||
// Return as [lats, lons] flat arrays
|
||||
String[] names = new String[n];
|
||||
double[] lats = new double[n];
|
||||
double[] lons = new double[n];
|
||||
|
||||
try (ResultSet data = stmt.executeQuery("SELECT lat, lon FROM places")) {
|
||||
try (ResultSet data = stmt.executeQuery("SELECT name, lat, lon FROM places")) {
|
||||
int i = 0;
|
||||
while (data.next()) {
|
||||
lats[i] = data.getDouble(1);
|
||||
lons[i] = data.getDouble(2);
|
||||
names[i] = data.getString(1);
|
||||
lats[i] = data.getDouble(2);
|
||||
lons[i] = data.getDouble(3);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
return new double[][]{lats, lons};
|
||||
return new Places(names, lats, lons);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -20,17 +20,25 @@ import java.util.Arrays;
|
|||
import java.util.EnumSet;
|
||||
import java.util.List;
|
||||
|
||||
/** R5 routing: network loading, point set construction, travel time computation. */
|
||||
/** R5 routing: network loading, spatial filtering, travel time computation. */
|
||||
public class Router {
|
||||
|
||||
private static final int ZOOM = 9;
|
||||
private static final int ZOOM = 9; // R5 enforces range 9-12
|
||||
private static final int MAX_GRID_CELLS = 4_900_000; // under R5's 5M limit
|
||||
|
||||
/**
|
||||
* A chunk of destinations that fits within R5's grid cell limit at zoom 9.
|
||||
* originalIndices maps each position in this chunk back to the full destinations array.
|
||||
*/
|
||||
record DestinationChunk(FreeFormPointSet pointSet, WebMercatorExtents extents, int[] originalIndices) {}
|
||||
/** Result of computing travel times for a single origin with spatial pre-filtering. */
|
||||
record FilteredResult(int[] originalIndices, short[] times) {}
|
||||
|
||||
/** Max plausible travel radius in km for 120-minute trips. */
|
||||
static double maxRadiusKm(String mode) {
|
||||
return switch (mode) {
|
||||
case "car" -> 150;
|
||||
case "transit" -> 150;
|
||||
case "bicycle" -> 60;
|
||||
case "walking" -> 12;
|
||||
default -> throw new IllegalArgumentException("Unknown mode: " + mode);
|
||||
};
|
||||
}
|
||||
|
||||
/** Load or build the transport network with Kryo caching. */
|
||||
static TransportNetwork loadNetwork(String dataDir, String cacheDir) throws Exception {
|
||||
|
|
@ -56,10 +64,80 @@ public class Router {
|
|||
}
|
||||
|
||||
/**
|
||||
* Split destinations into geographic chunks that each fit within R5's grid cell limit.
|
||||
* Sorts by latitude and splits into bands so each band's bounding box at zoom 9 is under 5M cells.
|
||||
* Filter destinations by distance, build chunks, compute travel times for one origin.
|
||||
* Returns only the filtered subset indices and their travel times.
|
||||
*/
|
||||
static List<DestinationChunk> buildDestinationChunks(double[] lats, double[] lons) {
|
||||
static FilteredResult computeForOrigin(
|
||||
TransportNetwork network,
|
||||
double[] allLats, double[] allLons,
|
||||
double originLat, double originLon,
|
||||
String mode, LocalDate date) {
|
||||
|
||||
double maxRadius = maxRadiusKm(mode);
|
||||
|
||||
// 1. Filter destinations by bounding box
|
||||
int[] filtered = filterByDistance(allLats, allLons, originLat, originLon, maxRadius);
|
||||
if (filtered.length == 0) {
|
||||
return new FilteredResult(new int[0], new short[0]);
|
||||
}
|
||||
|
||||
// 2. Extract filtered coordinate arrays
|
||||
double[] fLats = new double[filtered.length];
|
||||
double[] fLons = new double[filtered.length];
|
||||
for (int i = 0; i < filtered.length; i++) {
|
||||
fLats[i] = allLats[filtered[i]];
|
||||
fLons[i] = allLons[filtered[i]];
|
||||
}
|
||||
|
||||
// 3. Build chunks from filtered destinations
|
||||
List<DestinationChunk> chunks = buildDestinationChunks(fLats, fLons);
|
||||
|
||||
// 4. Compute travel times
|
||||
short[] times = computeTravelTimes(network, chunks, originLat, originLon, mode, fLats.length, date);
|
||||
|
||||
return new FilteredResult(filtered, times);
|
||||
}
|
||||
|
||||
/**
|
||||
* Filter destination indices to those within a bounding box of maxRadiusKm from origin.
|
||||
* Uses degree-based approximation — slightly overestimates at corners, which is fine.
|
||||
*/
|
||||
private static int[] filterByDistance(
|
||||
double[] lats, double[] lons,
|
||||
double originLat, double originLon,
|
||||
double maxRadiusKm) {
|
||||
|
||||
double degLat = maxRadiusKm / 111.0;
|
||||
double degLon = maxRadiusKm / (111.0 * Math.cos(Math.toRadians(originLat)));
|
||||
|
||||
double minLat = originLat - degLat;
|
||||
double maxLat = originLat + degLat;
|
||||
double minLon = originLon - degLon;
|
||||
double maxLon = originLon + degLon;
|
||||
|
||||
// Two-pass: count then fill (avoids ArrayList/boxing overhead)
|
||||
int count = 0;
|
||||
for (int i = 0; i < lats.length; i++) {
|
||||
if (lats[i] >= minLat && lats[i] <= maxLat && lons[i] >= minLon && lons[i] <= maxLon) {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
int[] result = new int[count];
|
||||
int j = 0;
|
||||
for (int i = 0; i < lats.length; i++) {
|
||||
if (lats[i] >= minLat && lats[i] <= maxLat && lons[i] >= minLon && lons[i] <= maxLon) {
|
||||
result[j++] = i;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Split destinations into geographic chunks that each fit within R5's grid cell limit.
|
||||
* Sorts by latitude and splits into bands so each band's bounding box is under 5M cells.
|
||||
*/
|
||||
private static List<DestinationChunk> buildDestinationChunks(double[] lats, double[] lons) {
|
||||
int n = lats.length;
|
||||
|
||||
// Sort indices by latitude for geographic chunking
|
||||
|
|
@ -94,13 +172,11 @@ public class Router {
|
|||
start = end;
|
||||
}
|
||||
|
||||
System.err.printf(" Split into %d chunks at zoom %d (grid width %d, max height %d)%n",
|
||||
chunks.size(), ZOOM, gridWidth, maxHeight);
|
||||
return chunks;
|
||||
}
|
||||
|
||||
/** Compute travel times from one origin to all destinations across all chunks. */
|
||||
static short[] computeTravelTimes(
|
||||
private static short[] computeTravelTimes(
|
||||
TransportNetwork network, List<DestinationChunk> chunks,
|
||||
double originLat, double originLon, String mode, int nDest, LocalDate date) {
|
||||
|
||||
|
|
@ -125,6 +201,10 @@ public class Router {
|
|||
return times;
|
||||
}
|
||||
|
||||
// --- Private helpers ---
|
||||
|
||||
private record DestinationChunk(FreeFormPointSet pointSet, WebMercatorExtents extents, int[] originalIndices) {}
|
||||
|
||||
private static DestinationChunk buildChunk(
|
||||
double[] lats, double[] lons, Integer[] sorted, int start, int end) {
|
||||
int size = end - start;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue