Rerun data pipelines

This commit is contained in:
Andras Schmelczer 2026-05-10 14:49:53 +01:00
parent 4c95815dc8
commit fc10381692
27 changed files with 2143 additions and 215 deletions

View file

@ -32,7 +32,7 @@ public class Parquet {
static Postcodes loadEnglandPostcodes(String parquetPath, Path refOut) throws Exception {
try (DuckDBConnection conn = connect(); Statement stmt = conn.createStatement()) {
stmt.execute("CREATE TABLE postcodes AS SELECT pcds, lat, \"long\" FROM read_parquet('"
+ escapePath(parquetPath) + "') WHERE ctry = 'E92000001' AND doterm IS NULL");
+ escapePath(parquetPath) + "') WHERE ctry25cd = 'E92000001' AND doterm IS NULL");
copyToParquet(stmt, "SELECT * FROM postcodes", refOut);
try (ResultSet rs = stmt.executeQuery("SELECT COUNT(*) FROM postcodes")) {