package propertymap; import org.duckdb.DuckDBAppender; import org.duckdb.DuckDBConnection; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.StandardCopyOption; import java.sql.DriverManager; import java.sql.ResultSet; import java.sql.Statement; import java.util.ArrayList; import java.util.List; /** DuckDB-based parquet I/O. */ public class Parquet { record Postcodes(String[] codes, double[] lats, double[] lons) {} static { try { Class.forName("org.duckdb.DuckDBDriver"); } catch (ClassNotFoundException e) { throw new RuntimeException(e); } } /** Load England postcodes, write reference parquet, return codes + flat lat/lon arrays. */ static Postcodes loadEnglandPostcodes(String parquetPath, Path refOut) throws Exception { try (DuckDBConnection conn = connect(); Statement stmt = conn.createStatement()) { stmt.execute("CREATE TABLE postcodes AS SELECT pcds, lat, \"long\" FROM read_parquet('" + parquetPath + "') WHERE ctry = 'E92000001'"); copyToParquet(stmt, "SELECT * FROM postcodes", refOut); try (ResultSet rs = stmt.executeQuery("SELECT COUNT(*) FROM postcodes")) { rs.next(); int n = rs.getInt(1); String[] codes = new String[n]; double[] lats = new double[n]; double[] lons = new double[n]; try (ResultSet data = stmt.executeQuery("SELECT pcds, lat, \"long\" FROM postcodes")) { int i = 0; while (data.next()) { codes[i] = data.getString(1); lats[i] = data.getDouble(2); lons[i] = data.getDouble(3); i++; } } return new Postcodes(codes, lats, lons); } } } /** Load places deduplicated by lat/lon, write reference parquet, return flat lat/lon arrays. */ static double[][] loadPlaces(String parquetPath, Path refOut) throws Exception { try (DuckDBConnection conn = connect(); Statement stmt = conn.createStatement()) { stmt.execute("CREATE TABLE places AS SELECT * EXCLUDE (rn) FROM (" + "SELECT *, ROW_NUMBER() OVER (PARTITION BY lat, lon) AS rn " + "FROM read_parquet('" + parquetPath + "')) WHERE rn = 1"); copyToParquet(stmt, "SELECT * FROM places", refOut); try (ResultSet rs = stmt.executeQuery("SELECT COUNT(*) FROM places")) { rs.next(); int n = rs.getInt(1); // Return as [lats, lons] flat arrays double[] lats = new double[n]; double[] lons = new double[n]; try (ResultSet data = stmt.executeQuery("SELECT lat, lon FROM places")) { int i = 0; while (data.next()) { lats[i] = data.getDouble(1); lons[i] = data.getDouble(2); i++; } } return new double[][]{lats, lons}; } } } /** Write postcode travel times as a ZSTD-compressed parquet (atomic via tmp + rename). */ static void writeTravelTimes(DuckDBConnection conn, Path outPath, String[] postcodes, short[] times) throws Exception { Path tmp = outPath.resolveSibling(outPath.getFileName() + ".tmp"); try (Statement stmt = conn.createStatement()) { stmt.execute("DROP TABLE IF EXISTS t"); stmt.execute("CREATE TABLE t (pcds VARCHAR, travel_minutes SMALLINT)"); } try (DuckDBAppender appender = conn.createAppender("main", "t")) { for (int i = 0; i < postcodes.length; i++) { appender.beginRow(); appender.append(postcodes[i]); appender.append(times[i]); appender.endRow(); } } try (Statement stmt = conn.createStatement()) { stmt.execute("COPY t TO '" + tmp.toAbsolutePath() + "' (FORMAT PARQUET, COMPRESSION ZSTD)"); } Files.move(tmp, outPath, StandardCopyOption.REPLACE_EXISTING, StandardCopyOption.ATOMIC_MOVE); } /** Create a new in-memory DuckDB connection (for use as a per-thread reusable connection). */ static DuckDBConnection connect() throws Exception { return (DuckDBConnection) DriverManager.getConnection("jdbc:duckdb:"); } private static void copyToParquet(Statement stmt, String query, Path outPath) throws Exception { stmt.execute("COPY (" + query + ") TO '" + outPath.toAbsolutePath() + "' (FORMAT PARQUET, COMPRESSION ZSTD)"); } }