seems fine

This commit is contained in:
Andras Schmelczer 2026-05-05 22:29:28 +01:00
parent 48983e3b4b
commit 7a1696541f
37 changed files with 4999 additions and 1242 deletions

View file

@ -59,9 +59,15 @@ public class Parquet {
/** Load places deduplicated by lat/lon, write reference parquet, return names + flat lat/lon arrays. */
static Places loadPlaces(String parquetPath, Path refOut) throws Exception {
try (DuckDBConnection conn = connect(); Statement stmt = conn.createStatement()) {
stmt.execute("CREATE TABLE all_places AS SELECT * FROM read_parquet('"
+ escapePath(parquetPath) + "')");
boolean hasTravelDestination = tableHasColumn(stmt, "all_places", "travel_destination");
String source = hasTravelDestination
? "(SELECT * FROM all_places WHERE COALESCE(travel_destination, true))"
: "all_places";
stmt.execute("CREATE TABLE places AS SELECT * EXCLUDE (rn) FROM ("
+ "SELECT *, ROW_NUMBER() OVER (PARTITION BY lat, lon) AS rn "
+ "FROM read_parquet('" + escapePath(parquetPath) + "')) WHERE rn = 1");
+ "FROM " + source + " AS p) WHERE rn = 1");
copyToParquet(stmt, "SELECT * FROM places", refOut);
try (ResultSet rs = stmt.executeQuery("SELECT COUNT(*) FROM places")) {
@ -85,6 +91,17 @@ public class Parquet {
}
}
private static boolean tableHasColumn(Statement stmt, String tableName, String columnName)
throws Exception {
try (ResultSet rs = stmt.executeQuery(
"SELECT COUNT(*) FROM information_schema.columns "
+ "WHERE table_name = '" + tableName + "' "
+ "AND column_name = '" + columnName + "'")) {
rs.next();
return rs.getInt(1) > 0;
}
}
/** Write postcode travel times as a ZSTD-compressed parquet (atomic via tmp + rename). */
static void writeTravelTimes(DuckDBConnection conn, Path outPath, String[] postcodes, short[] times)
throws Exception {