This commit is contained in:
Andras Schmelczer 2026-02-15 22:39:49 +00:00
parent 03445188ea
commit 524580eb25
102 changed files with 36625 additions and 1295 deletions

View file

@ -16,15 +16,19 @@ import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
/**
* Batch-compute travel times from each origin (place) to all destinations (postcodes)
* Batch-compute travel times from each origin (place) to nearby postcodes
* for all transport modes (car, bicycle, walking, transit).
*
* Output per mode: one parquet file per origin in {output-dir}/{mode}/{index}.parquet
* with columns (pcds VARCHAR, travel_minutes SMALLINT). -1 = unreachable within 120 min.
* Each origin is spatially pre-filtered to only route to postcodes within a
* plausible travel radius for the mode. Output is sparse: only reachable
* postcodes are written (unreachable = absent from file).
*
* Output per mode: one parquet file per origin in {output-dir}/{mode}/{name}.parquet
* with columns (pcds VARCHAR, travel_minutes SMALLINT).
*/
public class App {
private static final String[] MODES = {"car", "bicycle", "walking", "transit"};
private static final String[] MODES = {"bicycle", "walking", "transit", "car"};
private static final int MAX_RETRIES = 2;
public static void main(String[] args) throws Exception {
@ -42,18 +46,14 @@ public class App {
System.err.println("Loading postcodes (England only)...");
Parquet.Postcodes postcodes = Parquet.loadEnglandPostcodes(
postcodesPath, outDir.resolve("postcodes_ref.parquet"));
int nDest = postcodes.lats().length;
System.err.printf(" %,d postcodes%n", nDest);
List<Router.DestinationChunk> chunks = Router.buildDestinationChunks(postcodes.lats(), postcodes.lons());
System.err.printf(" %,d postcodes%n", postcodes.lats().length);
System.err.println("Loading places (deduplicated)...");
double[][] placesLatLon = Parquet.loadPlaces(placesPath, outDir.resolve("places_ref.parquet"));
double[] originLats = placesLatLon[0], originLons = placesLatLon[1];
Parquet.Places places = Parquet.loadPlaces(placesPath, outDir.resolve("places_ref.parquet"));
String[] originNames = places.names();
double[] originLats = places.lats(), originLons = places.lons();
int nOrigins = originLats.length;
System.err.printf(" %,d places%n", nOrigins);
System.err.printf(" Estimated output: %.1f GB (%,d x %,d x 2B)%n",
(double) nOrigins * nDest * 2 / 1e9, nOrigins, nDest);
// One thread pool shared across all modes
ExecutorService pool = Executors.newFixedThreadPool(threads);
@ -65,8 +65,8 @@ public class App {
try {
for (String mode : MODES) {
processMode(network, chunks, postcodes.codes(), originLats, originLons,
nDest, outDir, mode, today, pool, threadConn);
processMode(network, postcodes.codes(), postcodes.lats(), postcodes.lons(),
originNames, originLats, originLons, outDir, mode, today, pool, threadConn);
}
} finally {
pool.shutdown();
@ -75,17 +75,19 @@ public class App {
}
private static void processMode(
TransportNetwork network, List<Router.DestinationChunk> chunks,
String[] postcodes, double[] originLats, double[] originLons, int nDest,
TransportNetwork network,
String[] postcodes, double[] postcodeLats, double[] postcodeLons,
String[] originNames, double[] originLats, double[] originLons,
Path outDir, String mode, LocalDate date,
ExecutorService pool, ThreadLocal<DuckDBConnection> threadConn) throws Exception {
int nOrigins = originLats.length;
System.err.printf("%n=== %s ===%n", mode.toUpperCase());
System.err.printf(" Radius: %.0f km%n", Router.maxRadiusKm(mode));
Path modeDir = outDir.resolve(mode);
Files.createDirectories(modeDir);
List<Integer> remaining = findRemaining(modeDir, nOrigins);
List<Integer> remaining = findRemaining(modeDir, originNames);
int alreadyDone = nOrigins - remaining.size();
System.err.printf(" %,d done, %,d remaining%n", alreadyDone, remaining.size());
@ -121,12 +123,13 @@ public class App {
for (int idx : remaining) {
pool.submit(() -> {
try {
processOrigin(network, chunks, postcodes, originLats[idx], originLons[idx],
nDest, modeDir, mode, date, idx, threadConn.get());
processOrigin(network, postcodes, postcodeLats, postcodeLons,
originLats[idx], originLons[idx],
modeDir, mode, date, originNames[idx], threadConn.get());
completed.incrementAndGet();
} catch (Exception e) {
failed.incrementAndGet();
System.err.printf("%n [FAIL] origin %d: %s%n", idx, e.getMessage());
System.err.printf("%n [FAIL] origin %s: %s%n", originNames[idx], e.getMessage());
} finally {
latch.countDown();
}
@ -144,24 +147,43 @@ public class App {
/** Compute and write travel times for a single origin, with retry on failure. */
private static void processOrigin(
TransportNetwork network, List<Router.DestinationChunk> chunks,
String[] postcodes, double lat, double lon, int nDest,
Path modeDir, String mode, LocalDate date, int idx,
TransportNetwork network,
String[] postcodes, double[] postcodeLats, double[] postcodeLons,
double originLat, double originLon,
Path modeDir, String mode, LocalDate date, String name,
DuckDBConnection conn) throws Exception {
Path outPath = modeDir.resolve(String.format("%06d.parquet", idx));
Path outPath = modeDir.resolve(sanitizeFilename(name) + ".parquet");
Exception lastError = null;
for (int attempt = 0; attempt <= MAX_RETRIES; attempt++) {
try {
short[] times = Router.computeTravelTimes(network, chunks, lat, lon, mode, nDest, date);
Parquet.writeTravelTimes(conn, outPath, postcodes, times);
Router.FilteredResult result = Router.computeForOrigin(
network, postcodeLats, postcodeLons,
originLat, originLon, mode, date);
// Write only reachable postcodes (sparse output)
int reachable = 0;
for (short t : result.times()) if (t >= 0) reachable++;
String[] codes = new String[reachable];
short[] times = new short[reachable];
int j = 0;
for (int i = 0; i < result.times().length; i++) {
if (result.times()[i] >= 0) {
codes[j] = postcodes[result.originalIndices()[i]];
times[j] = result.times()[i];
j++;
}
}
Parquet.writeTravelTimes(conn, outPath, codes, times);
return;
} catch (Exception e) {
lastError = e;
if (attempt < MAX_RETRIES) {
System.err.printf("%n [RETRY %d/%d] origin %d: %s%n",
attempt + 1, MAX_RETRIES, idx, e.getMessage());
System.err.printf("%n [RETRY %d/%d] %s: %s%n",
attempt + 1, MAX_RETRIES, name, e.getMessage());
}
}
}
@ -169,10 +191,10 @@ public class App {
}
/** Find origin indices that don't yet have output parquet files. */
private static List<Integer> findRemaining(Path modeDir, int nOrigins) throws Exception {
private static List<Integer> findRemaining(Path modeDir, String[] names) throws Exception {
List<Integer> remaining = new ArrayList<>();
for (int i = 0; i < nOrigins; i++) {
Path f = modeDir.resolve(String.format("%06d.parquet", i));
for (int i = 0; i < names.length; i++) {
Path f = modeDir.resolve(sanitizeFilename(names[i]) + ".parquet");
if (!Files.exists(f) || Files.size(f) == 0) {
remaining.add(i);
}
@ -180,6 +202,13 @@ public class App {
return remaining;
}
/** Sanitize a place name into a safe filename (lowercase, spaces to hyphens, strip non-alphanumeric). */
private static String sanitizeFilename(String name) {
return name.toLowerCase()
.replaceAll("[^a-z0-9 -]", "")
.replaceAll("\\s+", "-");
}
private static String requiredArg(String[] args, String name) {
for (int i = 0; i < args.length - 1; i++) {
if (args[i].equals(name)) return args[i + 1];