good changes

This commit is contained in:
Andras Schmelczer 2026-03-25 08:04:48 +00:00
parent 160283f1a1
commit c997ea46a5
26 changed files with 991 additions and 288 deletions

View file

@ -3,6 +3,8 @@ package propertymap;
import com.conveyal.r5.transit.TransportNetwork;
import org.duckdb.DuckDBConnection;
import java.io.IOException;
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
@ -132,11 +134,15 @@ public class App {
Path modeDir = outDir.resolve(mode);
Files.createDirectories(modeDir);
// Scan existing slugs once (O(directory)) instead of per-origin stat calls.
// This matches by slug regardless of numeric prefix, so re-indexed places.parquet
// won't cause duplicate computation.
Set<String> existingSlugs = skipCompleted ? scanExistingSlugs(modeDir) : Set.of();
List<Integer> remaining = new ArrayList<>();
for (int idx : originIndices) {
if (skipCompleted) {
Path f = modeDir.resolve(originFilename(idx, originNames[idx]));
if (Files.exists(f) && Files.size(f) > 0) continue;
if (skipCompleted && existingSlugs.contains(slugFromName(originNames[idx]))) {
continue;
}
remaining.add(idx);
}
@ -255,10 +261,38 @@ public class App {
/** Build a filename from index + place name (index prefix prevents collisions after sanitization). */
private static String originFilename(int index, String name) {
String safe = name.toLowerCase()
return String.format("%06d-%s.parquet", index, slugFromName(name));
}
/** Slugify a place name: lowercase, strip non-alphanumeric (except spaces/hyphens), collapse whitespace. */
private static String slugFromName(String name) {
return name.toLowerCase()
.replaceAll("[^a-z0-9 -]", "")
.replaceAll("\\s+", "-");
return String.format("%06d-%s.parquet", index, safe);
}
/**
* Scan a mode directory for existing non-empty parquet files, returning the set of slugs
* (filenames with numeric prefix stripped). This allows resume to work across places.parquet
* rebuilds where indices change but slugs stay the same.
*/
private static Set<String> scanExistingSlugs(Path modeDir) throws IOException {
Set<String> slugs = new HashSet<>();
if (!Files.isDirectory(modeDir)) return slugs;
try (DirectoryStream<Path> stream = Files.newDirectoryStream(modeDir, "*.parquet")) {
for (Path p : stream) {
if (Files.size(p) > 0) {
String stem = p.getFileName().toString().replace(".parquet", "");
int dash = stem.indexOf('-');
if (dash > 0 && stem.substring(0, dash).chars().allMatch(Character::isDigit)) {
slugs.add(stem.substring(dash + 1));
} else {
slugs.add(stem);
}
}
}
}
return slugs;
}
private static String requiredArg(String[] args, String name) {