package propertymap; import org.duckdb.DuckDBAppender; import org.duckdb.DuckDBConnection; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.StandardCopyOption; import java.sql.DriverManager; import java.sql.ResultSet; import java.sql.Statement; import java.util.ArrayList; import java.util.List; /** DuckDB-based parquet I/O. */ public class Parquet { record Postcodes(String[] codes, double[] lats, double[] lons) {} record Places(String[] names, double[] lats, double[] lons) {} static { try { Class.forName("org.duckdb.DuckDBDriver"); } catch (ClassNotFoundException e) { throw new RuntimeException(e); } } /** Escape a file path for safe interpolation into DuckDB SQL (double single quotes). */ private static String escapePath(String path) { return path.replace("'", "''"); } /** Load England postcodes, write reference parquet, return codes + flat lat/lon arrays. */ static Postcodes loadEnglandPostcodes(String parquetPath, Path refOut) throws Exception { try (DuckDBConnection conn = connect(); Statement stmt = conn.createStatement()) { stmt.execute("CREATE TABLE postcodes AS SELECT pcds, lat, \"long\" FROM read_parquet('" + escapePath(parquetPath) + "') WHERE ctry25cd = 'E92000001' AND doterm IS NULL"); copyToParquet(stmt, "SELECT * FROM postcodes", refOut); try (ResultSet rs = stmt.executeQuery("SELECT COUNT(*) FROM postcodes")) { rs.next(); int n = rs.getInt(1); String[] codes = new String[n]; double[] lats = new double[n]; double[] lons = new double[n]; try (ResultSet data = stmt.executeQuery("SELECT pcds, lat, \"long\" FROM postcodes")) { int i = 0; while (data.next()) { codes[i] = data.getString(1); lats[i] = data.getDouble(2); lons[i] = data.getDouble(3); i++; } } return new Postcodes(codes, lats, lons); } } } /** Load places deduplicated by lat/lon, write reference parquet, return names + flat lat/lon arrays. */ static Places loadPlaces(String parquetPath, Path refOut) throws Exception { try (DuckDBConnection conn = connect(); Statement stmt = conn.createStatement()) { stmt.execute("CREATE TABLE all_places AS SELECT * FROM read_parquet('" + escapePath(parquetPath) + "')"); boolean hasTravelDestination = tableHasColumn(stmt, "all_places", "travel_destination"); String source = hasTravelDestination ? "(SELECT * FROM all_places WHERE COALESCE(travel_destination, true))" : "all_places"; stmt.execute("CREATE TABLE places AS SELECT * EXCLUDE (rn) FROM (" + "SELECT *, ROW_NUMBER() OVER (PARTITION BY lat, lon) AS rn " + "FROM " + source + " AS p) WHERE rn = 1"); copyToParquet(stmt, "SELECT * FROM places", refOut); try (ResultSet rs = stmt.executeQuery("SELECT COUNT(*) FROM places")) { rs.next(); int n = rs.getInt(1); String[] names = new String[n]; double[] lats = new double[n]; double[] lons = new double[n]; try (ResultSet data = stmt.executeQuery("SELECT name, lat, lon FROM places")) { int i = 0; while (data.next()) { names[i] = data.getString(1); lats[i] = data.getDouble(2); lons[i] = data.getDouble(3); i++; } } return new Places(names, lats, lons); } } } private static boolean tableHasColumn(Statement stmt, String tableName, String columnName) throws Exception { try (ResultSet rs = stmt.executeQuery( "SELECT COUNT(*) FROM information_schema.columns " + "WHERE table_name = '" + tableName + "' " + "AND column_name = '" + columnName + "'")) { rs.next(); return rs.getInt(1) > 0; } } /** Write postcode travel times as a ZSTD-compressed parquet (atomic via tmp + rename). */ static void writeTravelTimes(DuckDBConnection conn, Path outPath, String[] postcodes, short[] times) throws Exception { Path tmp = outPath.resolveSibling(outPath.getFileName() + ".tmp"); try (Statement stmt = conn.createStatement()) { stmt.execute("DROP TABLE IF EXISTS t"); stmt.execute("CREATE TABLE t (pcds VARCHAR, travel_minutes SMALLINT)"); } try (DuckDBAppender appender = conn.createAppender("main", "t")) { for (int i = 0; i < postcodes.length; i++) { appender.beginRow(); appender.append(postcodes[i]); appender.append(times[i]); appender.endRow(); } } try (Statement stmt = conn.createStatement()) { stmt.execute("COPY t TO '" + escapePath(tmp.toAbsolutePath().toString()) + "' (FORMAT PARQUET, COMPRESSION ZSTD)"); } Files.move(tmp, outPath, StandardCopyOption.REPLACE_EXISTING, StandardCopyOption.ATOMIC_MOVE); } /** * Write transit travel times with median, best-case, and optional journey columns. * @param journeys may be null (no journey column written) or non-null (journey VARCHAR added, individual elements may be null) */ static void writeTransitTravelTimes(DuckDBConnection conn, Path outPath, String[] postcodes, short[] times, short[] bestTimes, String[] journeys) throws Exception { Path tmp = outPath.resolveSibling(outPath.getFileName() + ".tmp"); boolean hasJourneys = journeys != null; try (Statement stmt = conn.createStatement()) { stmt.execute("DROP TABLE IF EXISTS t"); stmt.execute(hasJourneys ? "CREATE TABLE t (pcds VARCHAR, travel_minutes SMALLINT, best_minutes SMALLINT, journey VARCHAR)" : "CREATE TABLE t (pcds VARCHAR, travel_minutes SMALLINT, best_minutes SMALLINT)"); } try (DuckDBAppender appender = conn.createAppender("main", "t")) { for (int i = 0; i < postcodes.length; i++) { appender.beginRow(); appender.append(postcodes[i]); appender.append(times[i]); appender.append(bestTimes[i]); if (hasJourneys) appender.append(journeys[i]); // null-safe: DuckDB appends SQL NULL appender.endRow(); } } try (Statement stmt = conn.createStatement()) { stmt.execute("COPY t TO '" + escapePath(tmp.toAbsolutePath().toString()) + "' (FORMAT PARQUET, COMPRESSION ZSTD)"); } Files.move(tmp, outPath, StandardCopyOption.REPLACE_EXISTING, StandardCopyOption.ATOMIC_MOVE); } /** Create a new in-memory DuckDB connection (for use as a per-thread reusable connection). */ static DuckDBConnection connect() throws Exception { return (DuckDBConnection) DriverManager.getConnection("jdbc:duckdb:"); } private static void copyToParquet(Statement stmt, String query, Path outPath) throws Exception { stmt.execute("COPY (" + query + ") TO '" + escapePath(outPath.toAbsolutePath().toString()) + "' (FORMAT PARQUET, COMPRESSION ZSTD)"); } }