Add R5 validation

This commit is contained in:
Andras Schmelczer 2026-05-12 06:44:15 +01:00
parent f2a2651b8a
commit b580c51b6d
4 changed files with 53 additions and 10 deletions

View file

@ -1,9 +1,5 @@
# Data pipeline — download sources and build postcode.parquet + properties.parquet
#
# Usage:
# make -f Makefile.data prepare # Build all parquets (+ all deps)
# make -f Makefile.data tiles # Download UK map tiles
# make -f Makefile.data download-map-assets
#
# Or include from the main Makefile and use targets directly.
@ -47,6 +43,7 @@ UPRN_LOOKUP := $(DATA_DIR)/uprn_lookup.parquet
PC_BOUNDARIES := $(DATA_DIR)/postcode_boundaries
TRANSIT_DIR := $(DATA_DIR)/transit
TRANSIT_STAMP := $(TRANSIT_DIR)/.done
R5_NETWORK_CACHE := $(DATA_DIR)/r5-network/network.dat
GREENSPACE := $(DATA_DIR)/greenspace_water.parquet
OS_GREENSPACE := $(DATA_DIR)/os_greenspace.parquet
PBF := $(DATA_DIR)/england-latest.osm.pbf
@ -77,7 +74,7 @@ PMTILES_VERSION := 1.22.3
transform-school-proximity \
generate-postcode-boundaries generate-travel-times
prepare: $(PRICES_STAMP) download-places tiles generate-postcode-boundaries download-map-assets
prepare: $(PRICES_STAMP) download-places tiles generate-postcode-boundaries download-map-assets generate-travel-times
merge: $(MERGE_STAMP)
tiles: $(TILES)
download-arcgis: $(ARCGIS)
@ -119,7 +116,11 @@ generate-postcode-boundaries: $(OA_BOUNDARIES) $(INSPIRE_STAMP) $(UPRN_LOOKUP)
--oa-boundaries $(OA_BOUNDARIES) \
--inspire $(INSPIRE_DIR) \
--output $(PC_BOUNDARIES)
generate-travel-times: $(ARCGIS) $(PLACES) $(PBF) $(TRANSIT_STAMP)
generate-travel-times: $(ARCGIS) $(PLACES) $(PBF) download-transit-network
@if [ -f "$(R5_NETWORK_CACHE)" ] && { [ "$(PBF)" -nt "$(R5_NETWORK_CACHE)" ] || [ "$(TRANSIT_STAMP)" -nt "$(R5_NETWORK_CACHE)" ]; }; then \
echo "R5 inputs are newer than $(R5_NETWORK_CACHE); deleting stale cache"; \
rm -f "$(R5_NETWORK_CACHE)"; \
fi
./r5-java/run.sh
# ── Downloads ─────────────────────────────────────────────────────────────────

View file

@ -111,6 +111,19 @@ mkdir -p "$NETWORK_DIR"
OSM_PBF="property-data/england-latest.osm.pbf"
TRANSIT_SRC="property-data/transit"
NETWORK_DATA_DIR="$NETWORK_DIR/build"
shopt -s nullglob
GTFS_FILES=("$TRANSIT_SRC"/*.zip)
shopt -u nullglob
if [ ${#GTFS_FILES[@]} -eq 0 ]; then
echo "Error: no GTFS .zip files found in $TRANSIT_SRC/"
echo "Run: make -f Makefile.data download-transit-network"
if [ -f "$NETWORK_DIR/network.dat" ]; then
echo "A cached R5 network exists at $NETWORK_DIR/network.dat, but it may be stale or OSM-only."
echo "Delete it after restoring transit data so R5 rebuilds with GTFS."
fi
exit 1
fi
if [ ! -f "$NETWORK_DIR/network.dat" ]; then
BUILD_DIR="$NETWORK_DIR/build"
@ -122,10 +135,7 @@ if [ ! -f "$NETWORK_DIR/network.dat" ]; then
exit 1
fi
cp "$OSM_PBF" "$BUILD_DIR/"
if ! cp "$TRANSIT_SRC"/*.zip "$BUILD_DIR/" 2>/dev/null; then
echo "Warning: no GTFS .zip files found in $TRANSIT_SRC/ — transit routing would be unavailable"
exit 1
fi
cp "${GTFS_FILES[@]}" "$BUILD_DIR/"
fi
# --- Step 5: Run batch ---

View file

@ -54,6 +54,7 @@ public class App {
LocalDate today = LocalDate.now();
TransportNetwork network = Router.loadNetwork(requiredEnv("DATA_DIR"), requiredEnv("NETWORK_CACHE_DIR"));
Router.validateTransitServices(network, today);
System.err.println("Loading postcodes (England only)...");
Parquet.Postcodes postcodes = Parquet.loadEnglandPostcodes(

View file

@ -22,6 +22,7 @@ import java.io.File;
import java.time.LocalDate;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.EnumSet;
import java.util.List;
@ -78,12 +79,42 @@ public class Router {
System.err.println(" Cached to " + cacheFile);
}
validateTransitNetwork(network, cacheFile, dataDir);
System.err.println(" Building distance tables...");
network.transitLayer.buildDistanceTables(null);
System.err.println(" Network ready");
return network;
}
private static void validateTransitNetwork(TransportNetwork network, File cacheFile, String dataDir) {
TransitLayer transitLayer = network.transitLayer;
int stops = transitLayer == null ? 0 : transitLayer.getStopCount();
int routes = transitLayer == null || transitLayer.routes == null ? 0 : transitLayer.routes.size();
int patterns = transitLayer == null || transitLayer.tripPatterns == null ? 0 : transitLayer.tripPatterns.size();
int services = transitLayer == null || transitLayer.services == null ? 0 : transitLayer.services.size();
if (stops == 0 || routes == 0 || patterns == 0) {
throw new IllegalStateException(String.format(
"R5 network has no usable transit data (stops=%d, routes=%d, patterns=%d). "
+ "The cache at %s was likely built without GTFS. Ensure %s contains GTFS .zip files, "
+ "then delete %s and rerun.",
stops, routes, patterns, cacheFile.getPath(), dataDir, cacheFile.getPath()));
}
System.err.printf(" Transit: %,d stops, %,d routes, %,d patterns, %,d services%n",
stops, routes, patterns, services);
}
static void validateTransitServices(TransportNetwork network, LocalDate date) {
BitSet activeServices = network.transitLayer.getActiveServicesForDate(date);
if (activeServices.cardinality() == 0) {
throw new IllegalStateException("R5 network has transit data, but no active services on "
+ date + ". Rebuild property-data/transit from current feeds or choose a date covered by GTFS.");
}
System.err.printf(" Active transit services on %s: %,d%n", date, activeServices.cardinality());
}
/**
* Filter destinations by distance, build chunks, compute travel times for one origin.
* Returns only the filtered subset indices and their travel times.