package propertymap; import com.conveyal.r5.transit.TransportNetwork; import org.duckdb.DuckDBConnection; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.time.LocalDate; import java.util.ArrayList; import java.util.List; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; /** * Batch-compute travel times from each origin (place) to nearby postcodes * for all transport modes (car, bicycle, walking, transit). * * Each origin is spatially pre-filtered to only route to postcodes within a * plausible travel radius for the mode. Output is sparse: only reachable * postcodes are written (unreachable = absent from file). * * Output per mode: one parquet file per origin in {output-dir}/{mode}/{name}.parquet * with columns (pcds VARCHAR, travel_minutes SMALLINT). Transit mode additionally * includes a best_minutes SMALLINT column (5th percentile = best-case departure timing). */ public class App { private static final String[] MODES = {"bicycle", "transit", "walking", "car"}; private static final int MAX_RETRIES = 2; public static void main(String[] args) throws Exception { String postcodesPath = requiredArg(args, "--postcodes"); String placesPath = requiredArg(args, "--places"); String outputDirStr = requiredArg(args, "--output-dir"); int threads = Integer.parseInt(optionalArg(args, "--threads", "4")); Path outDir = Paths.get(outputDirStr); Files.createDirectories(outDir); LocalDate today = LocalDate.now(); TransportNetwork network = Router.loadNetwork(requiredEnv("DATA_DIR"), requiredEnv("NETWORK_CACHE_DIR")); System.err.println("Loading postcodes (England only)..."); Parquet.Postcodes postcodes = Parquet.loadEnglandPostcodes( postcodesPath, outDir.resolve("postcodes_ref.parquet")); System.err.printf(" %,d postcodes%n", postcodes.lats().length); System.err.println("Loading places (deduplicated)..."); Parquet.Places places = Parquet.loadPlaces(placesPath, outDir.resolve("places_ref.parquet")); String[] originNames = places.names(); double[] originLats = places.lats(), originLons = places.lons(); int nOrigins = originLats.length; System.err.printf(" %,d places%n", nOrigins); // One thread pool shared across all modes ExecutorService pool = Executors.newFixedThreadPool(threads); // One DuckDB connection per thread, reused across all writes ThreadLocal threadConn = ThreadLocal.withInitial(() -> { try { return Parquet.connect(); } catch (Exception e) { throw new RuntimeException(e); } }); try { for (String mode : MODES) { processMode(network, postcodes.codes(), postcodes.lats(), postcodes.lons(), originNames, originLats, originLons, outDir, mode, today, pool, threadConn); } } finally { pool.shutdown(); pool.awaitTermination(Long.MAX_VALUE, TimeUnit.MILLISECONDS); } } private static void processMode( TransportNetwork network, String[] postcodes, double[] postcodeLats, double[] postcodeLons, String[] originNames, double[] originLats, double[] originLons, Path outDir, String mode, LocalDate date, ExecutorService pool, ThreadLocal threadConn) throws Exception { int nOrigins = originLats.length; System.err.printf("%n=== %s ===%n", mode.toUpperCase()); System.err.printf(" Radius: %.0f km%n", Router.maxRadiusKm(mode)); Path modeDir = outDir.resolve(mode); Files.createDirectories(modeDir); List remaining = findRemaining(modeDir, originNames); int alreadyDone = nOrigins - remaining.size(); System.err.printf(" %,d done, %,d remaining%n", alreadyDone, remaining.size()); if (remaining.isEmpty()) { System.err.println(" All origins completed for this mode!"); return; } long startMs = System.currentTimeMillis(); int total = remaining.size(); AtomicInteger completed = new AtomicInteger(0); AtomicInteger failed = new AtomicInteger(0); // Progress reporter on a timer instead of per-task stderr writes ScheduledExecutorService reporter = Executors.newSingleThreadScheduledExecutor(r -> { Thread t = new Thread(r, "progress"); t.setDaemon(true); return t; }); reporter.scheduleAtFixedRate(() -> { int c = completed.get(); if (c == 0) return; double secs = (System.currentTimeMillis() - startMs) / 1000.0; double rate = c / secs; double etaH = (total - c) / rate / 3600; System.err.printf("\r [%,d/%,d] %.1f/s | ETA %.1fh | fail %d", c, total, rate, etaH, failed.get()); }, 2, 2, TimeUnit.SECONDS); CountDownLatch latch = new CountDownLatch(remaining.size()); for (int idx : remaining) { pool.submit(() -> { try { processOrigin(network, postcodes, postcodeLats, postcodeLons, originLats[idx], originLons[idx], modeDir, mode, date, idx, originNames[idx], threadConn.get()); completed.incrementAndGet(); } catch (Exception e) { failed.incrementAndGet(); System.err.printf("%n [FAIL] origin %s: %s%n", originNames[idx], e.getMessage()); } finally { latch.countDown(); } }); } latch.await(); reporter.shutdown(); reporter.awaitTermination(5, TimeUnit.SECONDS); double elapsedH = (System.currentTimeMillis() - startMs) / 3_600_000.0; int n = completed.get(); System.err.printf("\r [%,d/%,d] %.1f/s | %.1fh | fail %d%n", n, total, n / Math.max(elapsedH * 3600, 1), elapsedH, failed.get()); } /** Compute and write travel times for a single origin, with retry on failure. */ private static void processOrigin( TransportNetwork network, String[] postcodes, double[] postcodeLats, double[] postcodeLons, double originLat, double originLon, Path modeDir, String mode, LocalDate date, int index, String name, DuckDBConnection conn) throws Exception { Path outPath = modeDir.resolve(originFilename(index, name)); Exception lastError = null; for (int attempt = 0; attempt <= MAX_RETRIES; attempt++) { try { Router.FilteredResult result = Router.computeForOrigin( network, postcodeLats, postcodeLons, originLat, originLon, mode, date); // Write only reachable postcodes (sparse output) int reachable = 0; for (short t : result.times()) if (t >= 0) reachable++; String[] codes = new String[reachable]; short[] times = new short[reachable]; short[] bestTimes = result.bestTimes() != null ? new short[reachable] : null; int j = 0; for (int i = 0; i < result.times().length; i++) { if (result.times()[i] >= 0) { codes[j] = postcodes[result.originalIndices()[i]]; times[j] = result.times()[i]; if (bestTimes != null) bestTimes[j] = result.bestTimes()[i]; j++; } } if (bestTimes != null) { Parquet.writeTransitTravelTimes(conn, outPath, codes, times, bestTimes); } else { Parquet.writeTravelTimes(conn, outPath, codes, times); } return; } catch (Exception e) { lastError = e; if (attempt < MAX_RETRIES) { System.err.printf("%n [RETRY %d/%d] %s: %s%n", attempt + 1, MAX_RETRIES, name, e.getMessage()); } else { System.err.printf("%n [FAIL TRACE] %s:%n", name); e.printStackTrace(System.err); } } } throw lastError; } /** Find origin indices that don't yet have output parquet files. */ private static List findRemaining(Path modeDir, String[] names) throws Exception { List remaining = new ArrayList<>(); for (int i = 0; i < names.length; i++) { Path f = modeDir.resolve(originFilename(i, names[i])); if (!Files.exists(f) || Files.size(f) == 0) { remaining.add(i); } } return remaining; } /** Build a filename from index + place name (index prefix prevents collisions after sanitization). */ private static String originFilename(int index, String name) { String safe = name.toLowerCase() .replaceAll("[^a-z0-9 -]", "") .replaceAll("\\s+", "-"); return String.format("%06d-%s.parquet", index, safe); } private static String requiredArg(String[] args, String name) { for (int i = 0; i < args.length - 1; i++) { if (args[i].equals(name)) return args[i + 1]; } System.err.println("Missing required argument: " + name); System.err.println("Usage: App --postcodes FILE --places FILE --output-dir DIR [--threads N]"); System.exit(1); return null; // unreachable } private static String optionalArg(String[] args, String name, String defaultValue) { for (int i = 0; i < args.length - 1; i++) { if (args[i].equals(name)) return args[i + 1]; } return defaultValue; } private static String requiredEnv(String name) { String val = System.getenv(name); if (val == null) { System.err.println("Missing required environment variable: " + name); System.exit(1); } return val; } }