actually works
This commit is contained in:
parent
fb71622e40
commit
f2337dbbd0
7 changed files with 238 additions and 8 deletions
|
|
@ -1 +0,0 @@
|
|||
{"sessionId":"ba370285-50c8-425b-bbd6-21e8273dd73e","pid":77001,"procStart":"442537909","acquiredAt":1777926837871}
|
||||
|
|
@ -105,6 +105,7 @@ import { queuedCreateDeleteDoesNotHijackReusedPathTest } from "./tests/queued-cr
|
|||
import { renamedPendingCreateReusedPathThenDeleteTest } from "./tests/renamed-pending-create-reused-path-then-delete.test";
|
||||
import { renamePendingCreateOntoPendingDeletePathTest } from "./tests/rename-pending-create-onto-pending-delete-path.test";
|
||||
import { remoteQuickWriteRenameBeforeRecordTest } from "./tests/remote-quick-write-rename-before-record.test";
|
||||
import { selfMergePendingRenameAliasesSecondCreateTest } from "./tests/self-merge-pending-rename-aliases-second-create.test";
|
||||
|
||||
export const TESTS: Partial<Record<string, TestDefinition>> = {
|
||||
"rename-create-conflict": renameCreateConflictTest,
|
||||
|
|
@ -242,5 +243,7 @@ export const TESTS: Partial<Record<string, TestDefinition>> = {
|
|||
"queued-create-delete-does-not-hijack-reused-path":
|
||||
queuedCreateDeleteDoesNotHijackReusedPathTest,
|
||||
"remote-quick-write-rename-before-record":
|
||||
remoteQuickWriteRenameBeforeRecordTest
|
||||
remoteQuickWriteRenameBeforeRecordTest,
|
||||
"self-merge-pending-rename-aliases-second-create":
|
||||
selfMergePendingRenameAliasesSecondCreateTest
|
||||
};
|
||||
|
|
|
|||
|
|
@ -0,0 +1,152 @@
|
|||
import type { AssertableState } from "../utils/assertable-state";
|
||||
import type { TestDefinition } from "../test-definition";
|
||||
|
||||
export const selfMergePendingRenameAliasesSecondCreateTest: TestDefinition = {
|
||||
description:
|
||||
"Single client makes two distinct creates that briefly share a path. " +
|
||||
"Client 0 POSTs the first create at primary.md while the server is " +
|
||||
"paused. While that POST is in flight: a second create is queued at " +
|
||||
"staging.md, primary.md is renamed to moved.md (rewriting the in- " +
|
||||
"flight create's event.path to moved.md and pushing a rename " +
|
||||
"LocalUpdate at the queue tail), and staging.md is renamed onto the " +
|
||||
"now-vacated primary.md slot (rewriting the second create's " +
|
||||
"event.path to primary.md and pushing another rename LocalUpdate). " +
|
||||
"Client 0's WS is paused throughout, so its watermark stays at 0. " +
|
||||
"On resume the first POST commits Doc-X at primary.md (creation_vuid " +
|
||||
"= N). The drain then processes the second LocalCreate (POST " +
|
||||
"relativePath=primary.md, last_seen=0); the server's path-based " +
|
||||
"dedup sees N > 0 and merges the second create into Doc-X " +
|
||||
"(MergingUpdate). The buggy behaviour: processCreate's resolveCreate " +
|
||||
"calls upsertRecord with localPath=primary.md, but the existing " +
|
||||
"record (from the first create) already holds localPath=moved.md, " +
|
||||
"and upsertRecord's `existing.localPath !== undefined` guard " +
|
||||
"silently drops the new claim. The file at primary.md is left " +
|
||||
"orphaned: tracked by no record, never broadcast, never deleted. " +
|
||||
"After the user's renames the expected user-visible state is two " +
|
||||
"distinct files at moved.md and primary.md — both clients must " +
|
||||
"converge to that.",
|
||||
clients: 2,
|
||||
steps: [
|
||||
// Both clients online so the WS connection is established before
|
||||
// the test starts pausing things.
|
||||
{ type: "enable-sync", client: 0 },
|
||||
{ type: "enable-sync", client: 1 },
|
||||
{ type: "barrier" },
|
||||
|
||||
// Pause client 0's WS so its MinCovered watermark stays at 0
|
||||
// through the whole bug sequence. The merge condition the
|
||||
// server is going to fire is `creation_vuid > last_seen`; with
|
||||
// a non-zero gap the same-device second create gets merged
|
||||
// into the same-device first create.
|
||||
{ type: "pause-websocket", client: 0 },
|
||||
|
||||
// Client 1 commits a doc to push the server's vuid above 0.
|
||||
// Without this filler, Doc-X's create vuid could be 1 and
|
||||
// client 0's last_seen.add(1) would advance min to 1, killing
|
||||
// the watermark gap that triggers the merge.
|
||||
{
|
||||
type: "create",
|
||||
client: 1,
|
||||
path: "filler.md",
|
||||
content: "filler-content "
|
||||
},
|
||||
{ type: "sync", client: 1 },
|
||||
|
||||
// Pause the server so client 0's first create POST hangs in
|
||||
// flight, giving us a deterministic window in which to enqueue
|
||||
// the second create and the renames.
|
||||
{ type: "pause-server" },
|
||||
|
||||
// First create — Doc-X. The wire-loop drains it, captures
|
||||
// requestPath = event.path = "primary.md", reads the bytes,
|
||||
// sends the POST, and stalls on the response.
|
||||
{
|
||||
type: "create",
|
||||
client: 0,
|
||||
path: "primary.md",
|
||||
content: "primary content "
|
||||
},
|
||||
|
||||
// Make sure the POST is actually on the wire with
|
||||
// relativePath="primary.md" before we rewrite event.path.
|
||||
// Without this delay the rename can win the race, the POST
|
||||
// goes out with relativePath="moved.md", and the server-side
|
||||
// path-collision merge never fires.
|
||||
{ type: "sleep", ms: 100 },
|
||||
|
||||
// Second create at a staging path. The wire-loop is still
|
||||
// blocked on Doc-X's POST, so this LocalCreate just queues at
|
||||
// index 1.
|
||||
{
|
||||
type: "create",
|
||||
client: 0,
|
||||
path: "staging.md",
|
||||
content: "secondary content "
|
||||
},
|
||||
|
||||
// Rename Doc-X's path. enqueue's pending-create branch
|
||||
// rewrites Doc-X's event.path in place (moved.md) and pushes
|
||||
// a LocalUpdate(rename, originalPath=moved.md) at the END of
|
||||
// the queue. Note the ordering: this LocalUpdate is enqueued
|
||||
// AFTER the staging LocalCreate above. That ordering is
|
||||
// load-bearing — it is what makes the second create's POST
|
||||
// drain (and trigger the server-side merge) before Doc-X's
|
||||
// rename PUT moves the doc away from primary.md on the
|
||||
// server.
|
||||
{
|
||||
type: "rename",
|
||||
client: 0,
|
||||
oldPath: "primary.md",
|
||||
newPath: "moved.md"
|
||||
},
|
||||
|
||||
// Rename the staging file onto Doc-X's now-vacated primary.md
|
||||
// slot. enqueue rewrites the staging LocalCreate's event.path
|
||||
// to primary.md and pushes a LocalUpdate(rename,
|
||||
// originalPath=primary.md) at the queue tail. After this the
|
||||
// disk has: moved.md = Doc-X's bytes, primary.md = Doc-Y's
|
||||
// bytes.
|
||||
{
|
||||
type: "rename",
|
||||
client: 0,
|
||||
oldPath: "staging.md",
|
||||
newPath: "primary.md"
|
||||
},
|
||||
|
||||
// Let everything fly: server processes the queued POSTs;
|
||||
// client 0 catches up on broadcasts.
|
||||
{ type: "resume-server" },
|
||||
{ type: "resume-websocket", client: 0 },
|
||||
|
||||
{ type: "barrier" },
|
||||
|
||||
{
|
||||
type: "assert-consistent",
|
||||
verify: (state: AssertableState): void => {
|
||||
// The user did two distinct creates (Doc-X and Doc-Y);
|
||||
// both contents must survive on both clients.
|
||||
state.assertFileCount(3);
|
||||
state.assertFileExists("filler.md");
|
||||
state.assertFileExists("moved.md");
|
||||
state.assertFileExists("primary.md");
|
||||
|
||||
// After the renames the user expects:
|
||||
// - moved.md = the file that was originally created
|
||||
// at primary.md (Doc-X's content).
|
||||
// - primary.md = the file that was originally created
|
||||
// at staging.md (Doc-Y's content).
|
||||
state.assertContains("moved.md", "primary content");
|
||||
state.assertContains("primary.md", "secondary content");
|
||||
|
||||
// No content cross-contamination: each contribution
|
||||
// should land in exactly one of the user-visible
|
||||
// files. Under the bug, the orphan at primary.md
|
||||
// carries Doc-X's content (because Doc-Y's PUT was
|
||||
// aliased onto Doc-X's record and read Doc-X's bytes
|
||||
// from moved.md), so this catches the leak too.
|
||||
state.assertContentInAtMostOneFile("primary content");
|
||||
state.assertContentInAtMostOneFile("secondary content");
|
||||
}
|
||||
}
|
||||
]
|
||||
};
|
||||
|
|
@ -5,6 +5,7 @@ import type { FileOperations } from "../file-operations/file-operations";
|
|||
import { findMatchingFile } from "../utils/find-matching-file";
|
||||
import type { SyncEventQueue } from "./sync-event-queue";
|
||||
import { removeFromArray } from "../utils/remove-from-array";
|
||||
import { FileNotFoundError } from "../errors/file-not-found-error";
|
||||
|
||||
/**
|
||||
* Scans the local filesystem and the document database to determine
|
||||
|
|
@ -77,8 +78,26 @@ export async function scheduleOfflineChanges(
|
|||
}
|
||||
|
||||
const renamedPaths = new Set<RelativePath>();
|
||||
// Track paths that were in `allLocalFiles` at scan-start but have
|
||||
// since disappeared. The scan awaits between `listFilesRecursively`
|
||||
// and each `read`, so a concurrent delete (slow file events, real
|
||||
// user activity) can vacate a slot mid-scan. Throwing would abort
|
||||
// the whole scan; nothing to sync for a file that's already gone.
|
||||
const disappearedPaths = new Set<RelativePath>();
|
||||
for (const path of locallyPossibleCreatedFiles) {
|
||||
const content = await operations.read(path);
|
||||
let content: Uint8Array;
|
||||
try {
|
||||
content = await operations.read(path);
|
||||
} catch (e) {
|
||||
if (e instanceof FileNotFoundError) {
|
||||
logger.debug(
|
||||
`File ${path} disappeared before offline-scan could read it; skipping`
|
||||
);
|
||||
disappearedPaths.add(path);
|
||||
continue;
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
const contentHash = await hash(content);
|
||||
|
||||
const matchingDeletedFile = await findMatchingFile(
|
||||
|
|
@ -105,7 +124,7 @@ export async function scheduleOfflineChanges(
|
|||
}
|
||||
|
||||
for (const path of locallyPossibleCreatedFiles) {
|
||||
if (renamedPaths.has(path)) {
|
||||
if (renamedPaths.has(path) || disappearedPaths.has(path)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -189,6 +189,52 @@ export class SyncEventQueue {
|
|||
this._lastSeenUpdateId.add(id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Watermark to send with our own `POST /documents` requests.
|
||||
*
|
||||
* The contiguous-prefix `lastSeenUpdateId` lags behind reality whenever
|
||||
* there are gaps in the vuid stream we've observed: if the server has
|
||||
* committed vuids 1..N from various clients but we've only processed
|
||||
* a non-contiguous subset, `min` stays at the last hole. The server's
|
||||
* create handler reads this watermark to decide whether to merge a
|
||||
* new POST into an existing doc at the same path:
|
||||
*
|
||||
* creation_vault_update_id > last_seen_vault_update_id → merge
|
||||
*
|
||||
* That check is meant to fire only for docs the client genuinely
|
||||
* couldn't have known about. But on a same-device "rename a
|
||||
* pending-create away then create something else at that path" race,
|
||||
* the second POST went out with `last_seen = min` while we already
|
||||
* held a record for the first create at vuid=N — and the server
|
||||
* happily merged the second create into our own doc, aliasing two
|
||||
* physically distinct local files onto a single docId.
|
||||
*
|
||||
* The fix is path-scoped: if we already track a doc whose
|
||||
* `remoteRelativePath` matches the path we're about to POST, the
|
||||
* server's existing doc at that path is exactly the one we'd alias
|
||||
* into. Bumping `last_seen` to that record's `parentVersionId`
|
||||
* forces the server's `creation_vuid > last_seen` check to fail and
|
||||
* fall through to the deconflict path. For paths we don't yet
|
||||
* track, we send the regular `min` watermark — so a legitimate
|
||||
* cross-device merge (two clients independently creating the same
|
||||
* path) still fires when neither side holds a record for the
|
||||
* collision target.
|
||||
*/
|
||||
public lastSeenUpdateIdForCreate(
|
||||
requestPath: RelativePath
|
||||
): VaultUpdateId {
|
||||
let watermark = this._lastSeenUpdateId.min;
|
||||
for (const record of this.byDocId.values()) {
|
||||
if (
|
||||
record.remoteRelativePath === requestPath &&
|
||||
record.parentVersionId > watermark
|
||||
) {
|
||||
watermark = record.parentVersionId;
|
||||
}
|
||||
}
|
||||
return watermark;
|
||||
}
|
||||
|
||||
/**
|
||||
* Pin an additional ignore pattern that survives setting reloads. Used
|
||||
* by the Syncer to hide internal scratch paths (e.g. `.vaultlink/**`
|
||||
|
|
|
|||
|
|
@ -500,9 +500,16 @@ export class Syncer {
|
|||
// `updatePendingCreatePath` mutates queued creates when a not-yet-sent
|
||||
// local file is renamed, so a renamed-away generation does not create
|
||||
// a server document at a path that a newer local file has reused.
|
||||
//
|
||||
// `lastSeenUpdateIdForCreate(requestPath)` (rather than the contiguous
|
||||
// `lastSeenUpdateId`) blocks the server from path-merging this POST
|
||||
// into a doc we already track at the same path. Without that, a
|
||||
// same-device rename race can alias two physically distinct local
|
||||
// files onto one docId. See `SyncEventQueue.lastSeenUpdateIdForCreate`.
|
||||
const response = await this.syncService.create({
|
||||
relativePath: requestPath,
|
||||
lastSeenVaultUpdateId: this.queue.lastSeenUpdateId,
|
||||
lastSeenVaultUpdateId:
|
||||
this.queue.lastSeenUpdateIdForCreate(requestPath),
|
||||
contentBytes
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -41,9 +41,12 @@ echo "Server started with PID: $server_pid"
|
|||
|
||||
# Ensure server is killed on script exit
|
||||
cleanup_server() {
|
||||
if [ -n "$server_pid" ]; then
|
||||
echo "Stopping server (PID: $server_pid)..."
|
||||
kill $server_pid 2>/dev/null || true
|
||||
wait $server_pid 2>/dev/null || true
|
||||
server_pid=""
|
||||
fi
|
||||
}
|
||||
trap cleanup_server EXIT
|
||||
|
||||
|
|
@ -127,6 +130,7 @@ while true; do
|
|||
done
|
||||
|
||||
if $all_done; then
|
||||
cleanup_server
|
||||
echo "All processes completed successfully"
|
||||
exit 0
|
||||
fi
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue