This commit is contained in:
Andras Schmelczer 2026-02-15 22:39:49 +00:00
parent 03445188ea
commit 524580eb25
102 changed files with 36625 additions and 1295 deletions

View file

@ -16,6 +16,7 @@ import java.util.List;
public class Parquet {
record Postcodes(String[] codes, double[] lats, double[] lons) {}
record Places(String[] names, double[] lats, double[] lons) {}
static {
try { Class.forName("org.duckdb.DuckDBDriver"); }
@ -26,7 +27,7 @@ public class Parquet {
static Postcodes loadEnglandPostcodes(String parquetPath, Path refOut) throws Exception {
try (DuckDBConnection conn = connect(); Statement stmt = conn.createStatement()) {
stmt.execute("CREATE TABLE postcodes AS SELECT pcds, lat, \"long\" FROM read_parquet('"
+ parquetPath + "') WHERE ctry = 'E92000001'");
+ parquetPath + "') WHERE ctry = 'E92000001' AND doterm IS NULL");
copyToParquet(stmt, "SELECT * FROM postcodes", refOut);
try (ResultSet rs = stmt.executeQuery("SELECT COUNT(*) FROM postcodes")) {
@ -50,8 +51,8 @@ public class Parquet {
}
}
/** Load places deduplicated by lat/lon, write reference parquet, return flat lat/lon arrays. */
static double[][] loadPlaces(String parquetPath, Path refOut) throws Exception {
/** Load places deduplicated by lat/lon, write reference parquet, return names + flat lat/lon arrays. */
static Places loadPlaces(String parquetPath, Path refOut) throws Exception {
try (DuckDBConnection conn = connect(); Statement stmt = conn.createStatement()) {
stmt.execute("CREATE TABLE places AS SELECT * EXCLUDE (rn) FROM ("
+ "SELECT *, ROW_NUMBER() OVER (PARTITION BY lat, lon) AS rn "
@ -61,19 +62,20 @@ public class Parquet {
try (ResultSet rs = stmt.executeQuery("SELECT COUNT(*) FROM places")) {
rs.next();
int n = rs.getInt(1);
// Return as [lats, lons] flat arrays
String[] names = new String[n];
double[] lats = new double[n];
double[] lons = new double[n];
try (ResultSet data = stmt.executeQuery("SELECT lat, lon FROM places")) {
try (ResultSet data = stmt.executeQuery("SELECT name, lat, lon FROM places")) {
int i = 0;
while (data.next()) {
lats[i] = data.getDouble(1);
lons[i] = data.getDouble(2);
names[i] = data.getString(1);
lats[i] = data.getDouble(2);
lons[i] = data.getDouble(3);
i++;
}
}
return new double[][]{lats, lons};
return new Places(names, lats, lons);
}
}
}