seems fine
This commit is contained in:
parent
48983e3b4b
commit
7a1696541f
37 changed files with 4999 additions and 1242 deletions
|
|
@ -59,9 +59,15 @@ public class Parquet {
|
|||
/** Load places deduplicated by lat/lon, write reference parquet, return names + flat lat/lon arrays. */
|
||||
static Places loadPlaces(String parquetPath, Path refOut) throws Exception {
|
||||
try (DuckDBConnection conn = connect(); Statement stmt = conn.createStatement()) {
|
||||
stmt.execute("CREATE TABLE all_places AS SELECT * FROM read_parquet('"
|
||||
+ escapePath(parquetPath) + "')");
|
||||
boolean hasTravelDestination = tableHasColumn(stmt, "all_places", "travel_destination");
|
||||
String source = hasTravelDestination
|
||||
? "(SELECT * FROM all_places WHERE COALESCE(travel_destination, true))"
|
||||
: "all_places";
|
||||
stmt.execute("CREATE TABLE places AS SELECT * EXCLUDE (rn) FROM ("
|
||||
+ "SELECT *, ROW_NUMBER() OVER (PARTITION BY lat, lon) AS rn "
|
||||
+ "FROM read_parquet('" + escapePath(parquetPath) + "')) WHERE rn = 1");
|
||||
+ "FROM " + source + " AS p) WHERE rn = 1");
|
||||
copyToParquet(stmt, "SELECT * FROM places", refOut);
|
||||
|
||||
try (ResultSet rs = stmt.executeQuery("SELECT COUNT(*) FROM places")) {
|
||||
|
|
@ -85,6 +91,17 @@ public class Parquet {
|
|||
}
|
||||
}
|
||||
|
||||
private static boolean tableHasColumn(Statement stmt, String tableName, String columnName)
|
||||
throws Exception {
|
||||
try (ResultSet rs = stmt.executeQuery(
|
||||
"SELECT COUNT(*) FROM information_schema.columns "
|
||||
+ "WHERE table_name = '" + tableName + "' "
|
||||
+ "AND column_name = '" + columnName + "'")) {
|
||||
rs.next();
|
||||
return rs.getInt(1) > 0;
|
||||
}
|
||||
}
|
||||
|
||||
/** Write postcode travel times as a ZSTD-compressed parquet (atomic via tmp + rename). */
|
||||
static void writeTravelTimes(DuckDBConnection conn, Path outPath, String[] postcodes, short[] times)
|
||||
throws Exception {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue