Rerun data pipelines

This commit is contained in:
Andras Schmelczer 2026-05-10 14:49:53 +01:00
parent 4c95815dc8
commit fc10381692
27 changed files with 2143 additions and 215 deletions

View file

@ -111,20 +111,24 @@ fi
# R5 writes .mapdb temp files next to OSM/GTFS files during network construction.
# Copy source data to a writable build dir to avoid polluting the originals.
mkdir -p "$NETWORK_DIR"
OSM_PBF="property-data/england-latest.osm.pbf"
TRANSIT_SRC="property-data/transit"
NETWORK_DATA_DIR="$TRANSIT_SRC"
NETWORK_DATA_DIR="$NETWORK_DIR/build"
if [ ! -f "$NETWORK_DIR/network.dat" ]; then
BUILD_DIR="$NETWORK_DIR/build"
echo "--- No cached network — copying transit data to build dir ---"
mkdir -p "$BUILD_DIR"
if ! cp "$TRANSIT_SRC"/raw/*.osm.pbf "$BUILD_DIR/" 2>/dev/null; then
echo "Warning: no .osm.pbf files found in $TRANSIT_SRC/raw/"
if [ ! -f "$OSM_PBF" ]; then
echo "Error: OSM PBF not found at $OSM_PBF"
echo "Download it from https://download.geofabrik.de/europe/united-kingdom/england-latest.osm.pbf"
exit 1
fi
cp "$OSM_PBF" "$BUILD_DIR/"
if ! cp "$TRANSIT_SRC"/*.zip "$BUILD_DIR/" 2>/dev/null; then
echo "Warning: no .zip files found in $TRANSIT_SRC/"
echo "Warning: no GTFS .zip files found in $TRANSIT_SRC/ — transit routing would be unavailable"
exit 1
fi
NETWORK_DATA_DIR="$BUILD_DIR"
fi
# --- Step 5: Run batch ---

View file

@ -32,7 +32,7 @@ public class Parquet {
static Postcodes loadEnglandPostcodes(String parquetPath, Path refOut) throws Exception {
try (DuckDBConnection conn = connect(); Statement stmt = conn.createStatement()) {
stmt.execute("CREATE TABLE postcodes AS SELECT pcds, lat, \"long\" FROM read_parquet('"
+ escapePath(parquetPath) + "') WHERE ctry = 'E92000001' AND doterm IS NULL");
+ escapePath(parquetPath) + "') WHERE ctry25cd = 'E92000001' AND doterm IS NULL");
copyToParquet(stmt, "SELECT * FROM postcodes", refOut);
try (ResultSet rs = stmt.executeQuery("SELECT COUNT(*) FROM postcodes")) {