good changes
This commit is contained in:
parent
160283f1a1
commit
c997ea46a5
26 changed files with 991 additions and 288 deletions
|
|
@ -3,6 +3,8 @@ package propertymap;
|
|||
import com.conveyal.r5.transit.TransportNetwork;
|
||||
import org.duckdb.DuckDBConnection;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.DirectoryStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
|
|
@ -132,11 +134,15 @@ public class App {
|
|||
Path modeDir = outDir.resolve(mode);
|
||||
Files.createDirectories(modeDir);
|
||||
|
||||
// Scan existing slugs once (O(directory)) instead of per-origin stat calls.
|
||||
// This matches by slug regardless of numeric prefix, so re-indexed places.parquet
|
||||
// won't cause duplicate computation.
|
||||
Set<String> existingSlugs = skipCompleted ? scanExistingSlugs(modeDir) : Set.of();
|
||||
|
||||
List<Integer> remaining = new ArrayList<>();
|
||||
for (int idx : originIndices) {
|
||||
if (skipCompleted) {
|
||||
Path f = modeDir.resolve(originFilename(idx, originNames[idx]));
|
||||
if (Files.exists(f) && Files.size(f) > 0) continue;
|
||||
if (skipCompleted && existingSlugs.contains(slugFromName(originNames[idx]))) {
|
||||
continue;
|
||||
}
|
||||
remaining.add(idx);
|
||||
}
|
||||
|
|
@ -255,10 +261,38 @@ public class App {
|
|||
|
||||
/** Build a filename from index + place name (index prefix prevents collisions after sanitization). */
|
||||
private static String originFilename(int index, String name) {
|
||||
String safe = name.toLowerCase()
|
||||
return String.format("%06d-%s.parquet", index, slugFromName(name));
|
||||
}
|
||||
|
||||
/** Slugify a place name: lowercase, strip non-alphanumeric (except spaces/hyphens), collapse whitespace. */
|
||||
private static String slugFromName(String name) {
|
||||
return name.toLowerCase()
|
||||
.replaceAll("[^a-z0-9 -]", "")
|
||||
.replaceAll("\\s+", "-");
|
||||
return String.format("%06d-%s.parquet", index, safe);
|
||||
}
|
||||
|
||||
/**
|
||||
* Scan a mode directory for existing non-empty parquet files, returning the set of slugs
|
||||
* (filenames with numeric prefix stripped). This allows resume to work across places.parquet
|
||||
* rebuilds where indices change but slugs stay the same.
|
||||
*/
|
||||
private static Set<String> scanExistingSlugs(Path modeDir) throws IOException {
|
||||
Set<String> slugs = new HashSet<>();
|
||||
if (!Files.isDirectory(modeDir)) return slugs;
|
||||
try (DirectoryStream<Path> stream = Files.newDirectoryStream(modeDir, "*.parquet")) {
|
||||
for (Path p : stream) {
|
||||
if (Files.size(p) > 0) {
|
||||
String stem = p.getFileName().toString().replace(".parquet", "");
|
||||
int dash = stem.indexOf('-');
|
||||
if (dash > 0 && stem.substring(0, dash).chars().allMatch(Character::isDigit)) {
|
||||
slugs.add(stem.substring(dash + 1));
|
||||
} else {
|
||||
slugs.add(stem);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return slugs;
|
||||
}
|
||||
|
||||
private static String requiredArg(String[] args, String name) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue