378 lines
16 KiB
Java
378 lines
16 KiB
Java
package propertymap;
|
|
|
|
import com.conveyal.r5.transit.TransportNetwork;
|
|
import org.duckdb.DuckDBConnection;
|
|
|
|
import java.io.IOException;
|
|
import java.nio.file.DirectoryStream;
|
|
import java.nio.file.Files;
|
|
import java.nio.file.Path;
|
|
import java.nio.file.Paths;
|
|
import java.time.LocalDate;
|
|
import java.util.ArrayList;
|
|
import java.util.HashSet;
|
|
import java.util.List;
|
|
import java.util.Set;
|
|
import java.util.concurrent.CountDownLatch;
|
|
import java.util.concurrent.ExecutorService;
|
|
import java.util.concurrent.Executors;
|
|
import java.util.concurrent.ScheduledExecutorService;
|
|
import java.util.concurrent.TimeUnit;
|
|
import java.util.concurrent.atomic.AtomicInteger;
|
|
|
|
/**
|
|
* Batch-compute travel times from each origin (place) to nearby postcodes
|
|
* for all transport modes (car, bicycle, walking, transit).
|
|
*
|
|
* Each origin is spatially pre-filtered to only route to postcodes within a
|
|
* plausible travel radius for the mode. Output is sparse: only reachable
|
|
* postcodes are written (unreachable = absent from file).
|
|
*
|
|
* Output per mode: one parquet file per origin in {output-dir}/{mode}/{name}.parquet
|
|
* with columns (pcds VARCHAR, travel_minutes SMALLINT). Transit mode additionally
|
|
* includes a best_minutes SMALLINT column (5th percentile = best-case departure timing)
|
|
* and a journey VARCHAR column with JSON leg instructions.
|
|
*/
|
|
public class App {
|
|
|
|
private static final String[] MODES = {"bicycle", "transit", "walking", "car"};
|
|
private static final String[] DEMO_MODES = {"transit"};
|
|
private static final Set<String> DEMO_PLACES = Set.of(
|
|
"Bank tube station", "Tottenham Court Road tube station");
|
|
private static final int MAX_RETRIES = 2;
|
|
|
|
public static void main(String[] args) throws Exception {
|
|
String postcodesPath = requiredArg(args, "--postcodes");
|
|
String placesPath = requiredArg(args, "--places");
|
|
String outputDirStr = requiredArg(args, "--output-dir");
|
|
int threads = Integer.parseInt(optionalArg(args, "--threads", "4"));
|
|
boolean enablePaths = true;
|
|
boolean demo = hasFlag(args, "--demo");
|
|
|
|
Path outDir = Paths.get(outputDirStr);
|
|
Files.createDirectories(outDir);
|
|
|
|
LocalDate today = LocalDate.now();
|
|
TransportNetwork network = Router.loadNetwork(requiredEnv("DATA_DIR"), requiredEnv("NETWORK_CACHE_DIR"));
|
|
Router.validateTransitServices(network, today);
|
|
|
|
System.err.println("Loading postcodes (England only)...");
|
|
Parquet.Postcodes postcodes = Parquet.loadEnglandPostcodes(
|
|
postcodesPath, outDir.resolve("postcodes_ref.parquet"));
|
|
System.err.printf(" %,d postcodes%n", postcodes.lats().length);
|
|
|
|
System.err.println("Loading places (deduplicated)...");
|
|
Parquet.Places places = Parquet.loadPlaces(placesPath, outDir.resolve("places_ref.parquet"));
|
|
String[] originNames = places.names();
|
|
double[] originLats = places.lats(), originLons = places.lons();
|
|
int nOrigins = originLats.length;
|
|
System.err.printf(" %,d travel-eligible places%n", nOrigins);
|
|
|
|
// Filter places to England only (must be near at least one England postcode)
|
|
Set<Integer> englandIndices = filterEnglandPlaces(
|
|
originLats, originLons, postcodes.lats(), postcodes.lons());
|
|
int excluded = nOrigins - englandIndices.size();
|
|
if (excluded > 0) {
|
|
System.err.printf(" %,d places excluded (non-England), %,d remaining%n",
|
|
excluded, englandIndices.size());
|
|
}
|
|
|
|
// In demo mode, filter to just Bank + TCR and transit only
|
|
int[] originIndices;
|
|
String[] modes;
|
|
if (demo) {
|
|
List<Integer> demoIdx = new ArrayList<>();
|
|
for (int i = 0; i < nOrigins; i++) {
|
|
if (DEMO_PLACES.contains(originNames[i])) demoIdx.add(i);
|
|
}
|
|
originIndices = demoIdx.stream().mapToInt(Integer::intValue).toArray();
|
|
modes = DEMO_MODES;
|
|
System.err.printf("DEMO MODE: %d places (transit only)%n", originIndices.length);
|
|
for (int i : originIndices) System.err.printf(" - %s%n", originNames[i]);
|
|
} else {
|
|
// Normal mode: use all travel-eligible England places
|
|
originIndices = englandIndices.stream().sorted()
|
|
.mapToInt(Integer::intValue).toArray();
|
|
modes = MODES;
|
|
}
|
|
|
|
// One thread pool shared across all modes
|
|
ExecutorService pool = Executors.newFixedThreadPool(threads);
|
|
// One DuckDB connection per thread, reused across all writes
|
|
ThreadLocal<DuckDBConnection> threadConn = ThreadLocal.withInitial(() -> {
|
|
try { return Parquet.connect(); }
|
|
catch (Exception e) { throw new RuntimeException(e); }
|
|
});
|
|
|
|
if (enablePaths) System.err.println("Path recording ENABLED (transit only, ~20x slower)");
|
|
|
|
try {
|
|
for (String mode : modes) {
|
|
processMode(network, postcodes.codes(), postcodes.lats(), postcodes.lons(),
|
|
originNames, originLats, originLons, outDir, mode, today, pool, threadConn, enablePaths,
|
|
originIndices, !demo);
|
|
}
|
|
} finally {
|
|
pool.shutdown();
|
|
pool.awaitTermination(Long.MAX_VALUE, TimeUnit.MILLISECONDS);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @param originIndices origin indices to process.
|
|
* @param skipCompleted if true, skip origins that already have output files.
|
|
*/
|
|
private static void processMode(
|
|
TransportNetwork network,
|
|
String[] postcodes, double[] postcodeLats, double[] postcodeLons,
|
|
String[] originNames, double[] originLats, double[] originLons,
|
|
Path outDir, String mode, LocalDate date,
|
|
ExecutorService pool, ThreadLocal<DuckDBConnection> threadConn,
|
|
boolean enablePaths, int[] originIndices, boolean skipCompleted) throws Exception {
|
|
|
|
System.err.printf("%n=== %s ===%n", mode.toUpperCase());
|
|
System.err.printf(" Radius: %.0f km%n", Router.maxRadiusKm(mode));
|
|
Path modeDir = outDir.resolve(mode);
|
|
Files.createDirectories(modeDir);
|
|
|
|
// Scan existing slugs once (O(directory)) instead of per-origin stat calls.
|
|
// This matches by slug regardless of numeric prefix, so re-indexed places.parquet
|
|
// won't cause duplicate computation.
|
|
Set<String> existingSlugs = skipCompleted ? scanExistingSlugs(modeDir) : Set.of();
|
|
|
|
List<Integer> remaining = new ArrayList<>();
|
|
for (int idx : originIndices) {
|
|
if (skipCompleted && existingSlugs.contains(slugFromName(originNames[idx]))) {
|
|
continue;
|
|
}
|
|
remaining.add(idx);
|
|
}
|
|
int alreadyDone = originIndices.length - remaining.size();
|
|
System.err.printf(" %,d done, %,d remaining%n", alreadyDone, remaining.size());
|
|
|
|
if (remaining.isEmpty()) {
|
|
System.err.println(" All origins completed for this mode!");
|
|
return;
|
|
}
|
|
|
|
long startMs = System.currentTimeMillis();
|
|
int total = remaining.size();
|
|
AtomicInteger completed = new AtomicInteger(0);
|
|
AtomicInteger failed = new AtomicInteger(0);
|
|
|
|
// Progress reporter on a timer instead of per-task stderr writes
|
|
ScheduledExecutorService reporter = Executors.newSingleThreadScheduledExecutor(r -> {
|
|
Thread t = new Thread(r, "progress");
|
|
t.setDaemon(true);
|
|
return t;
|
|
});
|
|
reporter.scheduleAtFixedRate(() -> {
|
|
int c = completed.get();
|
|
if (c == 0) return;
|
|
double secs = (System.currentTimeMillis() - startMs) / 1000.0;
|
|
double rate = c / secs;
|
|
double etaH = (total - c) / rate / 3600;
|
|
System.err.printf("\r [%,d/%,d] %.1f/s | ETA %.1fh | fail %d",
|
|
c, total, rate, etaH, failed.get());
|
|
}, 2, 2, TimeUnit.SECONDS);
|
|
|
|
CountDownLatch latch = new CountDownLatch(remaining.size());
|
|
|
|
for (int idx : remaining) {
|
|
pool.submit(() -> {
|
|
try {
|
|
processOrigin(network, postcodes, postcodeLats, postcodeLons,
|
|
originLats[idx], originLons[idx],
|
|
modeDir, mode, date, idx, originNames[idx], threadConn.get(), enablePaths);
|
|
completed.incrementAndGet();
|
|
} catch (Exception e) {
|
|
failed.incrementAndGet();
|
|
System.err.printf("%n [FAIL] origin %s: %s%n", originNames[idx], e.getMessage());
|
|
} finally {
|
|
latch.countDown();
|
|
}
|
|
});
|
|
}
|
|
|
|
latch.await();
|
|
reporter.shutdown();
|
|
reporter.awaitTermination(5, TimeUnit.SECONDS);
|
|
|
|
double elapsedH = (System.currentTimeMillis() - startMs) / 3_600_000.0;
|
|
int n = completed.get();
|
|
System.err.printf("\r [%,d/%,d] %.1f/s | %.1fh | fail %d%n",
|
|
n, total, n / Math.max(elapsedH * 3600, 1), elapsedH, failed.get());
|
|
|
|
int failures = failed.get();
|
|
if (failures > 0) {
|
|
throw new IllegalStateException(String.format(
|
|
"%s travel-time generation failed for %,d of %,d origins; outputs are incomplete",
|
|
mode, failures, total));
|
|
}
|
|
}
|
|
|
|
/** Compute and write travel times for a single origin, with retry on failure. */
|
|
private static void processOrigin(
|
|
TransportNetwork network,
|
|
String[] postcodes, double[] postcodeLats, double[] postcodeLons,
|
|
double originLat, double originLon,
|
|
Path modeDir, String mode, LocalDate date, int index, String name,
|
|
DuckDBConnection conn, boolean enablePaths) throws Exception {
|
|
|
|
Path outPath = modeDir.resolve(originFilename(index, name));
|
|
Exception lastError = null;
|
|
|
|
for (int attempt = 0; attempt <= MAX_RETRIES; attempt++) {
|
|
try {
|
|
Router.FilteredResult result = Router.computeForOrigin(
|
|
network, postcodeLats, postcodeLons,
|
|
originLat, originLon, mode, date, enablePaths);
|
|
|
|
// Write only reachable postcodes (sparse output)
|
|
int reachable = 0;
|
|
for (short t : result.times()) if (t >= 0) reachable++;
|
|
|
|
String[] codes = new String[reachable];
|
|
short[] times = new short[reachable];
|
|
short[] bestTimes = result.bestTimes() != null ? new short[reachable] : null;
|
|
String[] journeys = result.journeys() != null ? new String[reachable] : null;
|
|
int j = 0;
|
|
for (int i = 0; i < result.times().length; i++) {
|
|
if (result.times()[i] >= 0) {
|
|
codes[j] = postcodes[result.originalIndices()[i]];
|
|
times[j] = result.times()[i];
|
|
if (bestTimes != null) bestTimes[j] = result.bestTimes()[i];
|
|
if (journeys != null) journeys[j] = result.journeys()[i]; // may be null for some postcodes
|
|
j++;
|
|
}
|
|
}
|
|
|
|
if (bestTimes != null) {
|
|
Parquet.writeTransitTravelTimes(conn, outPath, codes, times, bestTimes, journeys);
|
|
} else {
|
|
Parquet.writeTravelTimes(conn, outPath, codes, times);
|
|
}
|
|
return;
|
|
} catch (Exception e) {
|
|
lastError = e;
|
|
if (attempt < MAX_RETRIES) {
|
|
System.err.printf("%n [RETRY %d/%d] %s: %s%n",
|
|
attempt + 1, MAX_RETRIES, name, e.getMessage());
|
|
} else {
|
|
System.err.printf("%n [FAIL TRACE] %s:%n", name);
|
|
e.printStackTrace(System.err);
|
|
}
|
|
}
|
|
}
|
|
throw lastError;
|
|
}
|
|
|
|
/** Build a filename from index + place name (index prefix prevents collisions after sanitization). */
|
|
private static String originFilename(int index, String name) {
|
|
return String.format("%06d-%s.parquet", index, slugFromName(name));
|
|
}
|
|
|
|
/** Slugify a place name: lowercase, strip non-alphanumeric (except spaces/hyphens), collapse whitespace. */
|
|
private static String slugFromName(String name) {
|
|
return name.toLowerCase()
|
|
.replaceAll("[^a-z0-9 -]", "")
|
|
.replaceAll("\\s+", "-");
|
|
}
|
|
|
|
/**
|
|
* Scan a mode directory for existing non-empty parquet files, returning the set of slugs
|
|
* (filenames with numeric prefix stripped). This allows resume to work across places.parquet
|
|
* rebuilds where indices change but slugs stay the same.
|
|
*/
|
|
private static Set<String> scanExistingSlugs(Path modeDir) throws IOException {
|
|
Set<String> slugs = new HashSet<>();
|
|
if (!Files.isDirectory(modeDir)) return slugs;
|
|
try (DirectoryStream<Path> stream = Files.newDirectoryStream(modeDir, "*.parquet")) {
|
|
for (Path p : stream) {
|
|
if (Files.size(p) > 0) {
|
|
String stem = p.getFileName().toString().replace(".parquet", "");
|
|
int dash = stem.indexOf('-');
|
|
if (dash > 0 && stem.substring(0, dash).chars().allMatch(Character::isDigit)) {
|
|
slugs.add(stem.substring(dash + 1));
|
|
} else {
|
|
slugs.add(stem);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return slugs;
|
|
}
|
|
|
|
private static String requiredArg(String[] args, String name) {
|
|
for (int i = 0; i < args.length - 1; i++) {
|
|
if (args[i].equals(name)) return args[i + 1];
|
|
}
|
|
System.err.println("Missing required argument: " + name);
|
|
System.err.println("Usage: App --postcodes FILE --places FILE --output-dir DIR [--threads N] [--demo]");
|
|
System.exit(1);
|
|
return null; // unreachable
|
|
}
|
|
|
|
private static boolean hasFlag(String[] args, String name) {
|
|
for (String arg : args) {
|
|
if (arg.equals(name)) return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
private static String optionalArg(String[] args, String name, String defaultValue) {
|
|
for (int i = 0; i < args.length - 1; i++) {
|
|
if (args[i].equals(name)) return args[i + 1];
|
|
}
|
|
return defaultValue;
|
|
}
|
|
|
|
/**
|
|
* Filter place indices to those near at least one England postcode.
|
|
* Uses a 0.1° grid (~11km cells) built from postcode locations — a place is kept
|
|
* if its grid cell or any adjacent cell contains an England postcode.
|
|
*/
|
|
private static Set<Integer> filterEnglandPlaces(
|
|
double[] placeLats, double[] placeLons,
|
|
double[] postcodeLats, double[] postcodeLons) {
|
|
// Build grid of cells that contain at least one England postcode
|
|
Set<Long> postcodeCells = new HashSet<>();
|
|
for (int i = 0; i < postcodeLats.length; i++) {
|
|
postcodeCells.add(gridCell(postcodeLats[i], postcodeLons[i]));
|
|
}
|
|
|
|
// Keep places where the place's cell or any adjacent cell has postcodes
|
|
Set<Integer> keep = new HashSet<>();
|
|
for (int i = 0; i < placeLats.length; i++) {
|
|
int gLat = (int) Math.floor(placeLats[i] * 10);
|
|
int gLon = (int) Math.floor(placeLons[i] * 10);
|
|
outer:
|
|
for (int dLat = -1; dLat <= 1; dLat++) {
|
|
for (int dLon = -1; dLon <= 1; dLon++) {
|
|
if (postcodeCells.contains(packGrid(gLat + dLat, gLon + dLon))) {
|
|
keep.add(i);
|
|
break outer;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return keep;
|
|
}
|
|
|
|
private static long gridCell(double lat, double lon) {
|
|
return packGrid((int) Math.floor(lat * 10), (int) Math.floor(lon * 10));
|
|
}
|
|
|
|
private static long packGrid(int gLat, int gLon) {
|
|
return ((long) gLat << 32) | (gLon & 0xFFFFFFFFL);
|
|
}
|
|
|
|
private static String requiredEnv(String name) {
|
|
String val = System.getenv(name);
|
|
if (val == null) {
|
|
System.err.println("Missing required environment variable: " + name);
|
|
System.exit(1);
|
|
}
|
|
return val;
|
|
}
|
|
}
|