diff --git a/.claude/scheduled_tasks.lock b/.claude/scheduled_tasks.lock deleted file mode 100644 index fa24d1c1..00000000 --- a/.claude/scheduled_tasks.lock +++ /dev/null @@ -1 +0,0 @@ -{"sessionId":"ba370285-50c8-425b-bbd6-21e8273dd73e","pid":77001,"procStart":"442537909","acquiredAt":1777926837871} \ No newline at end of file diff --git a/frontend/deterministic-tests/src/test-registry.ts b/frontend/deterministic-tests/src/test-registry.ts index 6436292f..ed4fe026 100644 --- a/frontend/deterministic-tests/src/test-registry.ts +++ b/frontend/deterministic-tests/src/test-registry.ts @@ -105,6 +105,7 @@ import { queuedCreateDeleteDoesNotHijackReusedPathTest } from "./tests/queued-cr import { renamedPendingCreateReusedPathThenDeleteTest } from "./tests/renamed-pending-create-reused-path-then-delete.test"; import { renamePendingCreateOntoPendingDeletePathTest } from "./tests/rename-pending-create-onto-pending-delete-path.test"; import { remoteQuickWriteRenameBeforeRecordTest } from "./tests/remote-quick-write-rename-before-record.test"; +import { selfMergePendingRenameAliasesSecondCreateTest } from "./tests/self-merge-pending-rename-aliases-second-create.test"; export const TESTS: Partial> = { "rename-create-conflict": renameCreateConflictTest, @@ -242,5 +243,7 @@ export const TESTS: Partial> = { "queued-create-delete-does-not-hijack-reused-path": queuedCreateDeleteDoesNotHijackReusedPathTest, "remote-quick-write-rename-before-record": - remoteQuickWriteRenameBeforeRecordTest + remoteQuickWriteRenameBeforeRecordTest, + "self-merge-pending-rename-aliases-second-create": + selfMergePendingRenameAliasesSecondCreateTest }; diff --git a/frontend/deterministic-tests/src/tests/self-merge-pending-rename-aliases-second-create.test.ts b/frontend/deterministic-tests/src/tests/self-merge-pending-rename-aliases-second-create.test.ts new file mode 100644 index 00000000..ac8ed3ed --- /dev/null +++ b/frontend/deterministic-tests/src/tests/self-merge-pending-rename-aliases-second-create.test.ts @@ -0,0 +1,152 @@ +import type { AssertableState } from "../utils/assertable-state"; +import type { TestDefinition } from "../test-definition"; + +export const selfMergePendingRenameAliasesSecondCreateTest: TestDefinition = { + description: + "Single client makes two distinct creates that briefly share a path. " + + "Client 0 POSTs the first create at primary.md while the server is " + + "paused. While that POST is in flight: a second create is queued at " + + "staging.md, primary.md is renamed to moved.md (rewriting the in- " + + "flight create's event.path to moved.md and pushing a rename " + + "LocalUpdate at the queue tail), and staging.md is renamed onto the " + + "now-vacated primary.md slot (rewriting the second create's " + + "event.path to primary.md and pushing another rename LocalUpdate). " + + "Client 0's WS is paused throughout, so its watermark stays at 0. " + + "On resume the first POST commits Doc-X at primary.md (creation_vuid " + + "= N). The drain then processes the second LocalCreate (POST " + + "relativePath=primary.md, last_seen=0); the server's path-based " + + "dedup sees N > 0 and merges the second create into Doc-X " + + "(MergingUpdate). The buggy behaviour: processCreate's resolveCreate " + + "calls upsertRecord with localPath=primary.md, but the existing " + + "record (from the first create) already holds localPath=moved.md, " + + "and upsertRecord's `existing.localPath !== undefined` guard " + + "silently drops the new claim. The file at primary.md is left " + + "orphaned: tracked by no record, never broadcast, never deleted. " + + "After the user's renames the expected user-visible state is two " + + "distinct files at moved.md and primary.md — both clients must " + + "converge to that.", + clients: 2, + steps: [ + // Both clients online so the WS connection is established before + // the test starts pausing things. + { type: "enable-sync", client: 0 }, + { type: "enable-sync", client: 1 }, + { type: "barrier" }, + + // Pause client 0's WS so its MinCovered watermark stays at 0 + // through the whole bug sequence. The merge condition the + // server is going to fire is `creation_vuid > last_seen`; with + // a non-zero gap the same-device second create gets merged + // into the same-device first create. + { type: "pause-websocket", client: 0 }, + + // Client 1 commits a doc to push the server's vuid above 0. + // Without this filler, Doc-X's create vuid could be 1 and + // client 0's last_seen.add(1) would advance min to 1, killing + // the watermark gap that triggers the merge. + { + type: "create", + client: 1, + path: "filler.md", + content: "filler-content " + }, + { type: "sync", client: 1 }, + + // Pause the server so client 0's first create POST hangs in + // flight, giving us a deterministic window in which to enqueue + // the second create and the renames. + { type: "pause-server" }, + + // First create — Doc-X. The wire-loop drains it, captures + // requestPath = event.path = "primary.md", reads the bytes, + // sends the POST, and stalls on the response. + { + type: "create", + client: 0, + path: "primary.md", + content: "primary content " + }, + + // Make sure the POST is actually on the wire with + // relativePath="primary.md" before we rewrite event.path. + // Without this delay the rename can win the race, the POST + // goes out with relativePath="moved.md", and the server-side + // path-collision merge never fires. + { type: "sleep", ms: 100 }, + + // Second create at a staging path. The wire-loop is still + // blocked on Doc-X's POST, so this LocalCreate just queues at + // index 1. + { + type: "create", + client: 0, + path: "staging.md", + content: "secondary content " + }, + + // Rename Doc-X's path. enqueue's pending-create branch + // rewrites Doc-X's event.path in place (moved.md) and pushes + // a LocalUpdate(rename, originalPath=moved.md) at the END of + // the queue. Note the ordering: this LocalUpdate is enqueued + // AFTER the staging LocalCreate above. That ordering is + // load-bearing — it is what makes the second create's POST + // drain (and trigger the server-side merge) before Doc-X's + // rename PUT moves the doc away from primary.md on the + // server. + { + type: "rename", + client: 0, + oldPath: "primary.md", + newPath: "moved.md" + }, + + // Rename the staging file onto Doc-X's now-vacated primary.md + // slot. enqueue rewrites the staging LocalCreate's event.path + // to primary.md and pushes a LocalUpdate(rename, + // originalPath=primary.md) at the queue tail. After this the + // disk has: moved.md = Doc-X's bytes, primary.md = Doc-Y's + // bytes. + { + type: "rename", + client: 0, + oldPath: "staging.md", + newPath: "primary.md" + }, + + // Let everything fly: server processes the queued POSTs; + // client 0 catches up on broadcasts. + { type: "resume-server" }, + { type: "resume-websocket", client: 0 }, + + { type: "barrier" }, + + { + type: "assert-consistent", + verify: (state: AssertableState): void => { + // The user did two distinct creates (Doc-X and Doc-Y); + // both contents must survive on both clients. + state.assertFileCount(3); + state.assertFileExists("filler.md"); + state.assertFileExists("moved.md"); + state.assertFileExists("primary.md"); + + // After the renames the user expects: + // - moved.md = the file that was originally created + // at primary.md (Doc-X's content). + // - primary.md = the file that was originally created + // at staging.md (Doc-Y's content). + state.assertContains("moved.md", "primary content"); + state.assertContains("primary.md", "secondary content"); + + // No content cross-contamination: each contribution + // should land in exactly one of the user-visible + // files. Under the bug, the orphan at primary.md + // carries Doc-X's content (because Doc-Y's PUT was + // aliased onto Doc-X's record and read Doc-X's bytes + // from moved.md), so this catches the leak too. + state.assertContentInAtMostOneFile("primary content"); + state.assertContentInAtMostOneFile("secondary content"); + } + } + ] +}; diff --git a/frontend/sync-client/src/sync-operations/offline-change-detector.ts b/frontend/sync-client/src/sync-operations/offline-change-detector.ts index 134e152c..5b91e782 100644 --- a/frontend/sync-client/src/sync-operations/offline-change-detector.ts +++ b/frontend/sync-client/src/sync-operations/offline-change-detector.ts @@ -5,6 +5,7 @@ import type { FileOperations } from "../file-operations/file-operations"; import { findMatchingFile } from "../utils/find-matching-file"; import type { SyncEventQueue } from "./sync-event-queue"; import { removeFromArray } from "../utils/remove-from-array"; +import { FileNotFoundError } from "../errors/file-not-found-error"; /** * Scans the local filesystem and the document database to determine @@ -77,8 +78,26 @@ export async function scheduleOfflineChanges( } const renamedPaths = new Set(); + // Track paths that were in `allLocalFiles` at scan-start but have + // since disappeared. The scan awaits between `listFilesRecursively` + // and each `read`, so a concurrent delete (slow file events, real + // user activity) can vacate a slot mid-scan. Throwing would abort + // the whole scan; nothing to sync for a file that's already gone. + const disappearedPaths = new Set(); for (const path of locallyPossibleCreatedFiles) { - const content = await operations.read(path); + let content: Uint8Array; + try { + content = await operations.read(path); + } catch (e) { + if (e instanceof FileNotFoundError) { + logger.debug( + `File ${path} disappeared before offline-scan could read it; skipping` + ); + disappearedPaths.add(path); + continue; + } + throw e; + } const contentHash = await hash(content); const matchingDeletedFile = await findMatchingFile( @@ -105,7 +124,7 @@ export async function scheduleOfflineChanges( } for (const path of locallyPossibleCreatedFiles) { - if (renamedPaths.has(path)) { + if (renamedPaths.has(path) || disappearedPaths.has(path)) { continue; } diff --git a/frontend/sync-client/src/sync-operations/sync-event-queue.ts b/frontend/sync-client/src/sync-operations/sync-event-queue.ts index 2f07355f..75f675d0 100644 --- a/frontend/sync-client/src/sync-operations/sync-event-queue.ts +++ b/frontend/sync-client/src/sync-operations/sync-event-queue.ts @@ -189,6 +189,52 @@ export class SyncEventQueue { this._lastSeenUpdateId.add(id); } + /** + * Watermark to send with our own `POST /documents` requests. + * + * The contiguous-prefix `lastSeenUpdateId` lags behind reality whenever + * there are gaps in the vuid stream we've observed: if the server has + * committed vuids 1..N from various clients but we've only processed + * a non-contiguous subset, `min` stays at the last hole. The server's + * create handler reads this watermark to decide whether to merge a + * new POST into an existing doc at the same path: + * + * creation_vault_update_id > last_seen_vault_update_id → merge + * + * That check is meant to fire only for docs the client genuinely + * couldn't have known about. But on a same-device "rename a + * pending-create away then create something else at that path" race, + * the second POST went out with `last_seen = min` while we already + * held a record for the first create at vuid=N — and the server + * happily merged the second create into our own doc, aliasing two + * physically distinct local files onto a single docId. + * + * The fix is path-scoped: if we already track a doc whose + * `remoteRelativePath` matches the path we're about to POST, the + * server's existing doc at that path is exactly the one we'd alias + * into. Bumping `last_seen` to that record's `parentVersionId` + * forces the server's `creation_vuid > last_seen` check to fail and + * fall through to the deconflict path. For paths we don't yet + * track, we send the regular `min` watermark — so a legitimate + * cross-device merge (two clients independently creating the same + * path) still fires when neither side holds a record for the + * collision target. + */ + public lastSeenUpdateIdForCreate( + requestPath: RelativePath + ): VaultUpdateId { + let watermark = this._lastSeenUpdateId.min; + for (const record of this.byDocId.values()) { + if ( + record.remoteRelativePath === requestPath && + record.parentVersionId > watermark + ) { + watermark = record.parentVersionId; + } + } + return watermark; + } + /** * Pin an additional ignore pattern that survives setting reloads. Used * by the Syncer to hide internal scratch paths (e.g. `.vaultlink/**` diff --git a/frontend/sync-client/src/sync-operations/syncer.ts b/frontend/sync-client/src/sync-operations/syncer.ts index f63894de..4e908600 100644 --- a/frontend/sync-client/src/sync-operations/syncer.ts +++ b/frontend/sync-client/src/sync-operations/syncer.ts @@ -500,9 +500,16 @@ export class Syncer { // `updatePendingCreatePath` mutates queued creates when a not-yet-sent // local file is renamed, so a renamed-away generation does not create // a server document at a path that a newer local file has reused. + // + // `lastSeenUpdateIdForCreate(requestPath)` (rather than the contiguous + // `lastSeenUpdateId`) blocks the server from path-merging this POST + // into a doc we already track at the same path. Without that, a + // same-device rename race can alias two physically distinct local + // files onto one docId. See `SyncEventQueue.lastSeenUpdateIdForCreate`. const response = await this.syncService.create({ relativePath: requestPath, - lastSeenVaultUpdateId: this.queue.lastSeenUpdateId, + lastSeenVaultUpdateId: + this.queue.lastSeenUpdateIdForCreate(requestPath), contentBytes }); diff --git a/scripts/e2e.sh b/scripts/e2e.sh index d3ebefb2..7ab8d90c 100755 --- a/scripts/e2e.sh +++ b/scripts/e2e.sh @@ -41,9 +41,12 @@ echo "Server started with PID: $server_pid" # Ensure server is killed on script exit cleanup_server() { - echo "Stopping server (PID: $server_pid)..." - kill $server_pid 2>/dev/null || true - wait $server_pid 2>/dev/null || true + if [ -n "$server_pid" ]; then + echo "Stopping server (PID: $server_pid)..." + kill $server_pid 2>/dev/null || true + wait $server_pid 2>/dev/null || true + server_pid="" + fi } trap cleanup_server EXIT @@ -127,6 +130,7 @@ while true; do done if $all_done; then + cleanup_server echo "All processes completed successfully" exit 0 fi