package propertymap; import com.conveyal.r5.transit.TransportNetwork; import org.duckdb.DuckDBConnection; import java.io.IOException; import java.nio.file.DirectoryStream; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.time.LocalDate; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; /** * Batch-compute travel times from each origin (place) to nearby postcodes * for all transport modes (car, bicycle, walking, transit). * * Each origin is spatially pre-filtered to only route to postcodes within a * plausible travel radius for the mode. Output is sparse: only reachable * postcodes are written (unreachable = absent from file). * * Output per mode: one parquet file per origin in {output-dir}/{mode}/{name}.parquet * with columns (pcds VARCHAR, travel_minutes SMALLINT). Transit mode additionally * includes a best_minutes SMALLINT column (5th percentile = best-case departure timing) * and a journey VARCHAR column with JSON leg instructions. */ public class App { private static final String[] MODES = {"bicycle", "transit", "walking", "car"}; private static final String[] DEMO_MODES = {"transit"}; private static final Set DEMO_PLACES = Set.of( "Bank tube station", "Tottenham Court Road tube station"); private static final int MAX_RETRIES = 2; public static void main(String[] args) throws Exception { String postcodesPath = requiredArg(args, "--postcodes"); String placesPath = requiredArg(args, "--places"); String outputDirStr = requiredArg(args, "--output-dir"); int threads = Integer.parseInt(optionalArg(args, "--threads", "4")); boolean enablePaths = true; boolean demo = hasFlag(args, "--demo"); Path outDir = Paths.get(outputDirStr); Files.createDirectories(outDir); LocalDate today = LocalDate.now(); TransportNetwork network = Router.loadNetwork(requiredEnv("DATA_DIR"), requiredEnv("NETWORK_CACHE_DIR")); Router.validateTransitServices(network, today); System.err.println("Loading postcodes (England only)..."); Parquet.Postcodes postcodes = Parquet.loadEnglandPostcodes( postcodesPath, outDir.resolve("postcodes_ref.parquet")); System.err.printf(" %,d postcodes%n", postcodes.lats().length); System.err.println("Loading places (deduplicated)..."); Parquet.Places places = Parquet.loadPlaces(placesPath, outDir.resolve("places_ref.parquet")); String[] originNames = places.names(); double[] originLats = places.lats(), originLons = places.lons(); int nOrigins = originLats.length; System.err.printf(" %,d travel-eligible places%n", nOrigins); // Filter places to England only (must be near at least one England postcode) Set englandIndices = filterEnglandPlaces( originLats, originLons, postcodes.lats(), postcodes.lons()); int excluded = nOrigins - englandIndices.size(); if (excluded > 0) { System.err.printf(" %,d places excluded (non-England), %,d remaining%n", excluded, englandIndices.size()); } // In demo mode, filter to just Bank + TCR and transit only int[] originIndices; String[] modes; if (demo) { List demoIdx = new ArrayList<>(); for (int i = 0; i < nOrigins; i++) { if (DEMO_PLACES.contains(originNames[i])) demoIdx.add(i); } originIndices = demoIdx.stream().mapToInt(Integer::intValue).toArray(); modes = DEMO_MODES; System.err.printf("DEMO MODE: %d places (transit only)%n", originIndices.length); for (int i : originIndices) System.err.printf(" - %s%n", originNames[i]); } else { // Normal mode: use all travel-eligible England places originIndices = englandIndices.stream().sorted() .mapToInt(Integer::intValue).toArray(); modes = MODES; } // One thread pool shared across all modes ExecutorService pool = Executors.newFixedThreadPool(threads); // One DuckDB connection per thread, reused across all writes ThreadLocal threadConn = ThreadLocal.withInitial(() -> { try { return Parquet.connect(); } catch (Exception e) { throw new RuntimeException(e); } }); if (enablePaths) System.err.println("Path recording ENABLED (transit only, ~20x slower)"); try { for (String mode : modes) { processMode(network, postcodes.codes(), postcodes.lats(), postcodes.lons(), originNames, originLats, originLons, outDir, mode, today, pool, threadConn, enablePaths, originIndices, !demo); } } finally { pool.shutdown(); pool.awaitTermination(Long.MAX_VALUE, TimeUnit.MILLISECONDS); } } /** * @param originIndices origin indices to process. * @param skipCompleted if true, skip origins that already have output files. */ private static void processMode( TransportNetwork network, String[] postcodes, double[] postcodeLats, double[] postcodeLons, String[] originNames, double[] originLats, double[] originLons, Path outDir, String mode, LocalDate date, ExecutorService pool, ThreadLocal threadConn, boolean enablePaths, int[] originIndices, boolean skipCompleted) throws Exception { System.err.printf("%n=== %s ===%n", mode.toUpperCase()); System.err.printf(" Radius: %.0f km%n", Router.maxRadiusKm(mode)); Path modeDir = outDir.resolve(mode); Files.createDirectories(modeDir); // Scan existing slugs once (O(directory)) instead of per-origin stat calls. // This matches by slug regardless of numeric prefix, so re-indexed places.parquet // won't cause duplicate computation. Set existingSlugs = skipCompleted ? scanExistingSlugs(modeDir) : Set.of(); List remaining = new ArrayList<>(); for (int idx : originIndices) { if (skipCompleted && existingSlugs.contains(slugFromName(originNames[idx]))) { continue; } remaining.add(idx); } int alreadyDone = originIndices.length - remaining.size(); System.err.printf(" %,d done, %,d remaining%n", alreadyDone, remaining.size()); if (remaining.isEmpty()) { System.err.println(" All origins completed for this mode!"); return; } long startMs = System.currentTimeMillis(); int total = remaining.size(); AtomicInteger completed = new AtomicInteger(0); AtomicInteger failed = new AtomicInteger(0); // Progress reporter on a timer instead of per-task stderr writes ScheduledExecutorService reporter = Executors.newSingleThreadScheduledExecutor(r -> { Thread t = new Thread(r, "progress"); t.setDaemon(true); return t; }); reporter.scheduleAtFixedRate(() -> { int c = completed.get(); if (c == 0) return; double secs = (System.currentTimeMillis() - startMs) / 1000.0; double rate = c / secs; double etaH = (total - c) / rate / 3600; System.err.printf("\r [%,d/%,d] %.1f/s | ETA %.1fh | fail %d", c, total, rate, etaH, failed.get()); }, 2, 2, TimeUnit.SECONDS); CountDownLatch latch = new CountDownLatch(remaining.size()); for (int idx : remaining) { pool.submit(() -> { try { processOrigin(network, postcodes, postcodeLats, postcodeLons, originLats[idx], originLons[idx], modeDir, mode, date, idx, originNames[idx], threadConn.get(), enablePaths); completed.incrementAndGet(); } catch (Exception e) { failed.incrementAndGet(); System.err.printf("%n [FAIL] origin %s: %s%n", originNames[idx], e.getMessage()); } finally { latch.countDown(); } }); } latch.await(); reporter.shutdown(); reporter.awaitTermination(5, TimeUnit.SECONDS); double elapsedH = (System.currentTimeMillis() - startMs) / 3_600_000.0; int n = completed.get(); System.err.printf("\r [%,d/%,d] %.1f/s | %.1fh | fail %d%n", n, total, n / Math.max(elapsedH * 3600, 1), elapsedH, failed.get()); int failures = failed.get(); if (failures > 0) { throw new IllegalStateException(String.format( "%s travel-time generation failed for %,d of %,d origins; outputs are incomplete", mode, failures, total)); } } /** Compute and write travel times for a single origin, with retry on failure. */ private static void processOrigin( TransportNetwork network, String[] postcodes, double[] postcodeLats, double[] postcodeLons, double originLat, double originLon, Path modeDir, String mode, LocalDate date, int index, String name, DuckDBConnection conn, boolean enablePaths) throws Exception { Path outPath = modeDir.resolve(originFilename(index, name)); Exception lastError = null; for (int attempt = 0; attempt <= MAX_RETRIES; attempt++) { try { Router.FilteredResult result = Router.computeForOrigin( network, postcodeLats, postcodeLons, originLat, originLon, mode, date, enablePaths); // Write only reachable postcodes (sparse output) int reachable = 0; for (short t : result.times()) if (t >= 0) reachable++; String[] codes = new String[reachable]; short[] times = new short[reachable]; short[] bestTimes = result.bestTimes() != null ? new short[reachable] : null; String[] journeys = result.journeys() != null ? new String[reachable] : null; int j = 0; for (int i = 0; i < result.times().length; i++) { if (result.times()[i] >= 0) { codes[j] = postcodes[result.originalIndices()[i]]; times[j] = result.times()[i]; if (bestTimes != null) bestTimes[j] = result.bestTimes()[i]; if (journeys != null) journeys[j] = result.journeys()[i]; // may be null for some postcodes j++; } } if (bestTimes != null) { Parquet.writeTransitTravelTimes(conn, outPath, codes, times, bestTimes, journeys); } else { Parquet.writeTravelTimes(conn, outPath, codes, times); } return; } catch (Exception e) { lastError = e; if (attempt < MAX_RETRIES) { System.err.printf("%n [RETRY %d/%d] %s: %s%n", attempt + 1, MAX_RETRIES, name, e.getMessage()); } else { System.err.printf("%n [FAIL TRACE] %s:%n", name); e.printStackTrace(System.err); } } } throw lastError; } /** Build a filename from index + place name (index prefix prevents collisions after sanitization). */ private static String originFilename(int index, String name) { return String.format("%06d-%s.parquet", index, slugFromName(name)); } /** Slugify a place name: lowercase, strip non-alphanumeric (except spaces/hyphens), collapse whitespace. */ private static String slugFromName(String name) { return name.toLowerCase() .replaceAll("[^a-z0-9 -]", "") .replaceAll("\\s+", "-"); } /** * Scan a mode directory for existing non-empty parquet files, returning the set of slugs * (filenames with numeric prefix stripped). This allows resume to work across places.parquet * rebuilds where indices change but slugs stay the same. */ private static Set scanExistingSlugs(Path modeDir) throws IOException { Set slugs = new HashSet<>(); if (!Files.isDirectory(modeDir)) return slugs; try (DirectoryStream stream = Files.newDirectoryStream(modeDir, "*.parquet")) { for (Path p : stream) { if (Files.size(p) > 0) { String stem = p.getFileName().toString().replace(".parquet", ""); int dash = stem.indexOf('-'); if (dash > 0 && stem.substring(0, dash).chars().allMatch(Character::isDigit)) { slugs.add(stem.substring(dash + 1)); } else { slugs.add(stem); } } } } return slugs; } private static String requiredArg(String[] args, String name) { for (int i = 0; i < args.length - 1; i++) { if (args[i].equals(name)) return args[i + 1]; } System.err.println("Missing required argument: " + name); System.err.println("Usage: App --postcodes FILE --places FILE --output-dir DIR [--threads N] [--demo]"); System.exit(1); return null; // unreachable } private static boolean hasFlag(String[] args, String name) { for (String arg : args) { if (arg.equals(name)) return true; } return false; } private static String optionalArg(String[] args, String name, String defaultValue) { for (int i = 0; i < args.length - 1; i++) { if (args[i].equals(name)) return args[i + 1]; } return defaultValue; } /** * Filter place indices to those near at least one England postcode. * Uses a 0.1° grid (~11km cells) built from postcode locations — a place is kept * if its grid cell or any adjacent cell contains an England postcode. */ private static Set filterEnglandPlaces( double[] placeLats, double[] placeLons, double[] postcodeLats, double[] postcodeLons) { // Build grid of cells that contain at least one England postcode Set postcodeCells = new HashSet<>(); for (int i = 0; i < postcodeLats.length; i++) { postcodeCells.add(gridCell(postcodeLats[i], postcodeLons[i])); } // Keep places where the place's cell or any adjacent cell has postcodes Set keep = new HashSet<>(); for (int i = 0; i < placeLats.length; i++) { int gLat = (int) Math.floor(placeLats[i] * 10); int gLon = (int) Math.floor(placeLons[i] * 10); outer: for (int dLat = -1; dLat <= 1; dLat++) { for (int dLon = -1; dLon <= 1; dLon++) { if (postcodeCells.contains(packGrid(gLat + dLat, gLon + dLon))) { keep.add(i); break outer; } } } } return keep; } private static long gridCell(double lat, double lon) { return packGrid((int) Math.floor(lat * 10), (int) Math.floor(lon * 10)); } private static long packGrid(int gLat, int gLon) { return ((long) gLat << 32) | (gLon & 0xFFFFFFFFL); } private static String requiredEnv(String name) { String val = System.getenv(name); if (val == null) { System.err.println("Missing required environment variable: " + name); System.exit(1); } return val; } }