better transit times
This commit is contained in:
parent
974f005549
commit
205302dbb8
22 changed files with 247 additions and 69 deletions
|
|
@ -23,11 +23,16 @@ public class Parquet {
|
|||
catch (ClassNotFoundException e) { throw new RuntimeException(e); }
|
||||
}
|
||||
|
||||
/** Escape a file path for safe interpolation into DuckDB SQL (double single quotes). */
|
||||
private static String escapePath(String path) {
|
||||
return path.replace("'", "''");
|
||||
}
|
||||
|
||||
/** Load England postcodes, write reference parquet, return codes + flat lat/lon arrays. */
|
||||
static Postcodes loadEnglandPostcodes(String parquetPath, Path refOut) throws Exception {
|
||||
try (DuckDBConnection conn = connect(); Statement stmt = conn.createStatement()) {
|
||||
stmt.execute("CREATE TABLE postcodes AS SELECT pcds, lat, \"long\" FROM read_parquet('"
|
||||
+ parquetPath + "') WHERE ctry = 'E92000001' AND doterm IS NULL");
|
||||
+ escapePath(parquetPath) + "') WHERE ctry = 'E92000001' AND doterm IS NULL");
|
||||
copyToParquet(stmt, "SELECT * FROM postcodes", refOut);
|
||||
|
||||
try (ResultSet rs = stmt.executeQuery("SELECT COUNT(*) FROM postcodes")) {
|
||||
|
|
@ -56,7 +61,7 @@ public class Parquet {
|
|||
try (DuckDBConnection conn = connect(); Statement stmt = conn.createStatement()) {
|
||||
stmt.execute("CREATE TABLE places AS SELECT * EXCLUDE (rn) FROM ("
|
||||
+ "SELECT *, ROW_NUMBER() OVER (PARTITION BY lat, lon) AS rn "
|
||||
+ "FROM read_parquet('" + parquetPath + "')) WHERE rn = 1");
|
||||
+ "FROM read_parquet('" + escapePath(parquetPath) + "')) WHERE rn = 1");
|
||||
copyToParquet(stmt, "SELECT * FROM places", refOut);
|
||||
|
||||
try (ResultSet rs = stmt.executeQuery("SELECT COUNT(*) FROM places")) {
|
||||
|
|
@ -97,7 +102,30 @@ public class Parquet {
|
|||
}
|
||||
}
|
||||
try (Statement stmt = conn.createStatement()) {
|
||||
stmt.execute("COPY t TO '" + tmp.toAbsolutePath() + "' (FORMAT PARQUET, COMPRESSION ZSTD)");
|
||||
stmt.execute("COPY t TO '" + escapePath(tmp.toAbsolutePath().toString()) + "' (FORMAT PARQUET, COMPRESSION ZSTD)");
|
||||
}
|
||||
Files.move(tmp, outPath, StandardCopyOption.REPLACE_EXISTING, StandardCopyOption.ATOMIC_MOVE);
|
||||
}
|
||||
|
||||
/** Write transit travel times with both median and best-case columns. */
|
||||
static void writeTransitTravelTimes(DuckDBConnection conn, Path outPath,
|
||||
String[] postcodes, short[] times, short[] bestTimes) throws Exception {
|
||||
Path tmp = outPath.resolveSibling(outPath.getFileName() + ".tmp");
|
||||
try (Statement stmt = conn.createStatement()) {
|
||||
stmt.execute("DROP TABLE IF EXISTS t");
|
||||
stmt.execute("CREATE TABLE t (pcds VARCHAR, travel_minutes SMALLINT, best_minutes SMALLINT)");
|
||||
}
|
||||
try (DuckDBAppender appender = conn.createAppender("main", "t")) {
|
||||
for (int i = 0; i < postcodes.length; i++) {
|
||||
appender.beginRow();
|
||||
appender.append(postcodes[i]);
|
||||
appender.append(times[i]);
|
||||
appender.append(bestTimes[i]);
|
||||
appender.endRow();
|
||||
}
|
||||
}
|
||||
try (Statement stmt = conn.createStatement()) {
|
||||
stmt.execute("COPY t TO '" + escapePath(tmp.toAbsolutePath().toString()) + "' (FORMAT PARQUET, COMPRESSION ZSTD)");
|
||||
}
|
||||
Files.move(tmp, outPath, StandardCopyOption.REPLACE_EXISTING, StandardCopyOption.ATOMIC_MOVE);
|
||||
}
|
||||
|
|
@ -108,7 +136,7 @@ public class Parquet {
|
|||
}
|
||||
|
||||
private static void copyToParquet(Statement stmt, String query, Path outPath) throws Exception {
|
||||
stmt.execute("COPY (" + query + ") TO '" + outPath.toAbsolutePath()
|
||||
stmt.execute("COPY (" + query + ") TO '" + escapePath(outPath.toAbsolutePath().toString())
|
||||
+ "' (FORMAT PARQUET, COMPRESSION ZSTD)");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue