better transit times

This commit is contained in:
Andras Schmelczer 2026-02-22 11:13:39 +00:00
parent 974f005549
commit 205302dbb8
22 changed files with 247 additions and 69 deletions

View file

@ -23,11 +23,16 @@ public class Parquet {
catch (ClassNotFoundException e) { throw new RuntimeException(e); }
}
/** Escape a file path for safe interpolation into DuckDB SQL (double single quotes). */
private static String escapePath(String path) {
return path.replace("'", "''");
}
/** Load England postcodes, write reference parquet, return codes + flat lat/lon arrays. */
static Postcodes loadEnglandPostcodes(String parquetPath, Path refOut) throws Exception {
try (DuckDBConnection conn = connect(); Statement stmt = conn.createStatement()) {
stmt.execute("CREATE TABLE postcodes AS SELECT pcds, lat, \"long\" FROM read_parquet('"
+ parquetPath + "') WHERE ctry = 'E92000001' AND doterm IS NULL");
+ escapePath(parquetPath) + "') WHERE ctry = 'E92000001' AND doterm IS NULL");
copyToParquet(stmt, "SELECT * FROM postcodes", refOut);
try (ResultSet rs = stmt.executeQuery("SELECT COUNT(*) FROM postcodes")) {
@ -56,7 +61,7 @@ public class Parquet {
try (DuckDBConnection conn = connect(); Statement stmt = conn.createStatement()) {
stmt.execute("CREATE TABLE places AS SELECT * EXCLUDE (rn) FROM ("
+ "SELECT *, ROW_NUMBER() OVER (PARTITION BY lat, lon) AS rn "
+ "FROM read_parquet('" + parquetPath + "')) WHERE rn = 1");
+ "FROM read_parquet('" + escapePath(parquetPath) + "')) WHERE rn = 1");
copyToParquet(stmt, "SELECT * FROM places", refOut);
try (ResultSet rs = stmt.executeQuery("SELECT COUNT(*) FROM places")) {
@ -97,7 +102,30 @@ public class Parquet {
}
}
try (Statement stmt = conn.createStatement()) {
stmt.execute("COPY t TO '" + tmp.toAbsolutePath() + "' (FORMAT PARQUET, COMPRESSION ZSTD)");
stmt.execute("COPY t TO '" + escapePath(tmp.toAbsolutePath().toString()) + "' (FORMAT PARQUET, COMPRESSION ZSTD)");
}
Files.move(tmp, outPath, StandardCopyOption.REPLACE_EXISTING, StandardCopyOption.ATOMIC_MOVE);
}
/** Write transit travel times with both median and best-case columns. */
static void writeTransitTravelTimes(DuckDBConnection conn, Path outPath,
String[] postcodes, short[] times, short[] bestTimes) throws Exception {
Path tmp = outPath.resolveSibling(outPath.getFileName() + ".tmp");
try (Statement stmt = conn.createStatement()) {
stmt.execute("DROP TABLE IF EXISTS t");
stmt.execute("CREATE TABLE t (pcds VARCHAR, travel_minutes SMALLINT, best_minutes SMALLINT)");
}
try (DuckDBAppender appender = conn.createAppender("main", "t")) {
for (int i = 0; i < postcodes.length; i++) {
appender.beginRow();
appender.append(postcodes[i]);
appender.append(times[i]);
appender.append(bestTimes[i]);
appender.endRow();
}
}
try (Statement stmt = conn.createStatement()) {
stmt.execute("COPY t TO '" + escapePath(tmp.toAbsolutePath().toString()) + "' (FORMAT PARQUET, COMPRESSION ZSTD)");
}
Files.move(tmp, outPath, StandardCopyOption.REPLACE_EXISTING, StandardCopyOption.ATOMIC_MOVE);
}
@ -108,7 +136,7 @@ public class Parquet {
}
private static void copyToParquet(Statement stmt, String query, Path outPath) throws Exception {
stmt.execute("COPY (" + query + ") TO '" + outPath.toAbsolutePath()
stmt.execute("COPY (" + query + ") TO '" + escapePath(outPath.toAbsolutePath().toString())
+ "' (FORMAT PARQUET, COMPRESSION ZSTD)");
}
}