actually works
This commit is contained in:
parent
fb71622e40
commit
f2337dbbd0
7 changed files with 238 additions and 8 deletions
|
|
@ -1 +0,0 @@
|
||||||
{"sessionId":"ba370285-50c8-425b-bbd6-21e8273dd73e","pid":77001,"procStart":"442537909","acquiredAt":1777926837871}
|
|
||||||
|
|
@ -105,6 +105,7 @@ import { queuedCreateDeleteDoesNotHijackReusedPathTest } from "./tests/queued-cr
|
||||||
import { renamedPendingCreateReusedPathThenDeleteTest } from "./tests/renamed-pending-create-reused-path-then-delete.test";
|
import { renamedPendingCreateReusedPathThenDeleteTest } from "./tests/renamed-pending-create-reused-path-then-delete.test";
|
||||||
import { renamePendingCreateOntoPendingDeletePathTest } from "./tests/rename-pending-create-onto-pending-delete-path.test";
|
import { renamePendingCreateOntoPendingDeletePathTest } from "./tests/rename-pending-create-onto-pending-delete-path.test";
|
||||||
import { remoteQuickWriteRenameBeforeRecordTest } from "./tests/remote-quick-write-rename-before-record.test";
|
import { remoteQuickWriteRenameBeforeRecordTest } from "./tests/remote-quick-write-rename-before-record.test";
|
||||||
|
import { selfMergePendingRenameAliasesSecondCreateTest } from "./tests/self-merge-pending-rename-aliases-second-create.test";
|
||||||
|
|
||||||
export const TESTS: Partial<Record<string, TestDefinition>> = {
|
export const TESTS: Partial<Record<string, TestDefinition>> = {
|
||||||
"rename-create-conflict": renameCreateConflictTest,
|
"rename-create-conflict": renameCreateConflictTest,
|
||||||
|
|
@ -242,5 +243,7 @@ export const TESTS: Partial<Record<string, TestDefinition>> = {
|
||||||
"queued-create-delete-does-not-hijack-reused-path":
|
"queued-create-delete-does-not-hijack-reused-path":
|
||||||
queuedCreateDeleteDoesNotHijackReusedPathTest,
|
queuedCreateDeleteDoesNotHijackReusedPathTest,
|
||||||
"remote-quick-write-rename-before-record":
|
"remote-quick-write-rename-before-record":
|
||||||
remoteQuickWriteRenameBeforeRecordTest
|
remoteQuickWriteRenameBeforeRecordTest,
|
||||||
|
"self-merge-pending-rename-aliases-second-create":
|
||||||
|
selfMergePendingRenameAliasesSecondCreateTest
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,152 @@
|
||||||
|
import type { AssertableState } from "../utils/assertable-state";
|
||||||
|
import type { TestDefinition } from "../test-definition";
|
||||||
|
|
||||||
|
export const selfMergePendingRenameAliasesSecondCreateTest: TestDefinition = {
|
||||||
|
description:
|
||||||
|
"Single client makes two distinct creates that briefly share a path. " +
|
||||||
|
"Client 0 POSTs the first create at primary.md while the server is " +
|
||||||
|
"paused. While that POST is in flight: a second create is queued at " +
|
||||||
|
"staging.md, primary.md is renamed to moved.md (rewriting the in- " +
|
||||||
|
"flight create's event.path to moved.md and pushing a rename " +
|
||||||
|
"LocalUpdate at the queue tail), and staging.md is renamed onto the " +
|
||||||
|
"now-vacated primary.md slot (rewriting the second create's " +
|
||||||
|
"event.path to primary.md and pushing another rename LocalUpdate). " +
|
||||||
|
"Client 0's WS is paused throughout, so its watermark stays at 0. " +
|
||||||
|
"On resume the first POST commits Doc-X at primary.md (creation_vuid " +
|
||||||
|
"= N). The drain then processes the second LocalCreate (POST " +
|
||||||
|
"relativePath=primary.md, last_seen=0); the server's path-based " +
|
||||||
|
"dedup sees N > 0 and merges the second create into Doc-X " +
|
||||||
|
"(MergingUpdate). The buggy behaviour: processCreate's resolveCreate " +
|
||||||
|
"calls upsertRecord with localPath=primary.md, but the existing " +
|
||||||
|
"record (from the first create) already holds localPath=moved.md, " +
|
||||||
|
"and upsertRecord's `existing.localPath !== undefined` guard " +
|
||||||
|
"silently drops the new claim. The file at primary.md is left " +
|
||||||
|
"orphaned: tracked by no record, never broadcast, never deleted. " +
|
||||||
|
"After the user's renames the expected user-visible state is two " +
|
||||||
|
"distinct files at moved.md and primary.md — both clients must " +
|
||||||
|
"converge to that.",
|
||||||
|
clients: 2,
|
||||||
|
steps: [
|
||||||
|
// Both clients online so the WS connection is established before
|
||||||
|
// the test starts pausing things.
|
||||||
|
{ type: "enable-sync", client: 0 },
|
||||||
|
{ type: "enable-sync", client: 1 },
|
||||||
|
{ type: "barrier" },
|
||||||
|
|
||||||
|
// Pause client 0's WS so its MinCovered watermark stays at 0
|
||||||
|
// through the whole bug sequence. The merge condition the
|
||||||
|
// server is going to fire is `creation_vuid > last_seen`; with
|
||||||
|
// a non-zero gap the same-device second create gets merged
|
||||||
|
// into the same-device first create.
|
||||||
|
{ type: "pause-websocket", client: 0 },
|
||||||
|
|
||||||
|
// Client 1 commits a doc to push the server's vuid above 0.
|
||||||
|
// Without this filler, Doc-X's create vuid could be 1 and
|
||||||
|
// client 0's last_seen.add(1) would advance min to 1, killing
|
||||||
|
// the watermark gap that triggers the merge.
|
||||||
|
{
|
||||||
|
type: "create",
|
||||||
|
client: 1,
|
||||||
|
path: "filler.md",
|
||||||
|
content: "filler-content "
|
||||||
|
},
|
||||||
|
{ type: "sync", client: 1 },
|
||||||
|
|
||||||
|
// Pause the server so client 0's first create POST hangs in
|
||||||
|
// flight, giving us a deterministic window in which to enqueue
|
||||||
|
// the second create and the renames.
|
||||||
|
{ type: "pause-server" },
|
||||||
|
|
||||||
|
// First create — Doc-X. The wire-loop drains it, captures
|
||||||
|
// requestPath = event.path = "primary.md", reads the bytes,
|
||||||
|
// sends the POST, and stalls on the response.
|
||||||
|
{
|
||||||
|
type: "create",
|
||||||
|
client: 0,
|
||||||
|
path: "primary.md",
|
||||||
|
content: "primary content "
|
||||||
|
},
|
||||||
|
|
||||||
|
// Make sure the POST is actually on the wire with
|
||||||
|
// relativePath="primary.md" before we rewrite event.path.
|
||||||
|
// Without this delay the rename can win the race, the POST
|
||||||
|
// goes out with relativePath="moved.md", and the server-side
|
||||||
|
// path-collision merge never fires.
|
||||||
|
{ type: "sleep", ms: 100 },
|
||||||
|
|
||||||
|
// Second create at a staging path. The wire-loop is still
|
||||||
|
// blocked on Doc-X's POST, so this LocalCreate just queues at
|
||||||
|
// index 1.
|
||||||
|
{
|
||||||
|
type: "create",
|
||||||
|
client: 0,
|
||||||
|
path: "staging.md",
|
||||||
|
content: "secondary content "
|
||||||
|
},
|
||||||
|
|
||||||
|
// Rename Doc-X's path. enqueue's pending-create branch
|
||||||
|
// rewrites Doc-X's event.path in place (moved.md) and pushes
|
||||||
|
// a LocalUpdate(rename, originalPath=moved.md) at the END of
|
||||||
|
// the queue. Note the ordering: this LocalUpdate is enqueued
|
||||||
|
// AFTER the staging LocalCreate above. That ordering is
|
||||||
|
// load-bearing — it is what makes the second create's POST
|
||||||
|
// drain (and trigger the server-side merge) before Doc-X's
|
||||||
|
// rename PUT moves the doc away from primary.md on the
|
||||||
|
// server.
|
||||||
|
{
|
||||||
|
type: "rename",
|
||||||
|
client: 0,
|
||||||
|
oldPath: "primary.md",
|
||||||
|
newPath: "moved.md"
|
||||||
|
},
|
||||||
|
|
||||||
|
// Rename the staging file onto Doc-X's now-vacated primary.md
|
||||||
|
// slot. enqueue rewrites the staging LocalCreate's event.path
|
||||||
|
// to primary.md and pushes a LocalUpdate(rename,
|
||||||
|
// originalPath=primary.md) at the queue tail. After this the
|
||||||
|
// disk has: moved.md = Doc-X's bytes, primary.md = Doc-Y's
|
||||||
|
// bytes.
|
||||||
|
{
|
||||||
|
type: "rename",
|
||||||
|
client: 0,
|
||||||
|
oldPath: "staging.md",
|
||||||
|
newPath: "primary.md"
|
||||||
|
},
|
||||||
|
|
||||||
|
// Let everything fly: server processes the queued POSTs;
|
||||||
|
// client 0 catches up on broadcasts.
|
||||||
|
{ type: "resume-server" },
|
||||||
|
{ type: "resume-websocket", client: 0 },
|
||||||
|
|
||||||
|
{ type: "barrier" },
|
||||||
|
|
||||||
|
{
|
||||||
|
type: "assert-consistent",
|
||||||
|
verify: (state: AssertableState): void => {
|
||||||
|
// The user did two distinct creates (Doc-X and Doc-Y);
|
||||||
|
// both contents must survive on both clients.
|
||||||
|
state.assertFileCount(3);
|
||||||
|
state.assertFileExists("filler.md");
|
||||||
|
state.assertFileExists("moved.md");
|
||||||
|
state.assertFileExists("primary.md");
|
||||||
|
|
||||||
|
// After the renames the user expects:
|
||||||
|
// - moved.md = the file that was originally created
|
||||||
|
// at primary.md (Doc-X's content).
|
||||||
|
// - primary.md = the file that was originally created
|
||||||
|
// at staging.md (Doc-Y's content).
|
||||||
|
state.assertContains("moved.md", "primary content");
|
||||||
|
state.assertContains("primary.md", "secondary content");
|
||||||
|
|
||||||
|
// No content cross-contamination: each contribution
|
||||||
|
// should land in exactly one of the user-visible
|
||||||
|
// files. Under the bug, the orphan at primary.md
|
||||||
|
// carries Doc-X's content (because Doc-Y's PUT was
|
||||||
|
// aliased onto Doc-X's record and read Doc-X's bytes
|
||||||
|
// from moved.md), so this catches the leak too.
|
||||||
|
state.assertContentInAtMostOneFile("primary content");
|
||||||
|
state.assertContentInAtMostOneFile("secondary content");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
};
|
||||||
|
|
@ -5,6 +5,7 @@ import type { FileOperations } from "../file-operations/file-operations";
|
||||||
import { findMatchingFile } from "../utils/find-matching-file";
|
import { findMatchingFile } from "../utils/find-matching-file";
|
||||||
import type { SyncEventQueue } from "./sync-event-queue";
|
import type { SyncEventQueue } from "./sync-event-queue";
|
||||||
import { removeFromArray } from "../utils/remove-from-array";
|
import { removeFromArray } from "../utils/remove-from-array";
|
||||||
|
import { FileNotFoundError } from "../errors/file-not-found-error";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Scans the local filesystem and the document database to determine
|
* Scans the local filesystem and the document database to determine
|
||||||
|
|
@ -77,8 +78,26 @@ export async function scheduleOfflineChanges(
|
||||||
}
|
}
|
||||||
|
|
||||||
const renamedPaths = new Set<RelativePath>();
|
const renamedPaths = new Set<RelativePath>();
|
||||||
|
// Track paths that were in `allLocalFiles` at scan-start but have
|
||||||
|
// since disappeared. The scan awaits between `listFilesRecursively`
|
||||||
|
// and each `read`, so a concurrent delete (slow file events, real
|
||||||
|
// user activity) can vacate a slot mid-scan. Throwing would abort
|
||||||
|
// the whole scan; nothing to sync for a file that's already gone.
|
||||||
|
const disappearedPaths = new Set<RelativePath>();
|
||||||
for (const path of locallyPossibleCreatedFiles) {
|
for (const path of locallyPossibleCreatedFiles) {
|
||||||
const content = await operations.read(path);
|
let content: Uint8Array;
|
||||||
|
try {
|
||||||
|
content = await operations.read(path);
|
||||||
|
} catch (e) {
|
||||||
|
if (e instanceof FileNotFoundError) {
|
||||||
|
logger.debug(
|
||||||
|
`File ${path} disappeared before offline-scan could read it; skipping`
|
||||||
|
);
|
||||||
|
disappearedPaths.add(path);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
const contentHash = await hash(content);
|
const contentHash = await hash(content);
|
||||||
|
|
||||||
const matchingDeletedFile = await findMatchingFile(
|
const matchingDeletedFile = await findMatchingFile(
|
||||||
|
|
@ -105,7 +124,7 @@ export async function scheduleOfflineChanges(
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const path of locallyPossibleCreatedFiles) {
|
for (const path of locallyPossibleCreatedFiles) {
|
||||||
if (renamedPaths.has(path)) {
|
if (renamedPaths.has(path) || disappearedPaths.has(path)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -189,6 +189,52 @@ export class SyncEventQueue {
|
||||||
this._lastSeenUpdateId.add(id);
|
this._lastSeenUpdateId.add(id);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Watermark to send with our own `POST /documents` requests.
|
||||||
|
*
|
||||||
|
* The contiguous-prefix `lastSeenUpdateId` lags behind reality whenever
|
||||||
|
* there are gaps in the vuid stream we've observed: if the server has
|
||||||
|
* committed vuids 1..N from various clients but we've only processed
|
||||||
|
* a non-contiguous subset, `min` stays at the last hole. The server's
|
||||||
|
* create handler reads this watermark to decide whether to merge a
|
||||||
|
* new POST into an existing doc at the same path:
|
||||||
|
*
|
||||||
|
* creation_vault_update_id > last_seen_vault_update_id → merge
|
||||||
|
*
|
||||||
|
* That check is meant to fire only for docs the client genuinely
|
||||||
|
* couldn't have known about. But on a same-device "rename a
|
||||||
|
* pending-create away then create something else at that path" race,
|
||||||
|
* the second POST went out with `last_seen = min` while we already
|
||||||
|
* held a record for the first create at vuid=N — and the server
|
||||||
|
* happily merged the second create into our own doc, aliasing two
|
||||||
|
* physically distinct local files onto a single docId.
|
||||||
|
*
|
||||||
|
* The fix is path-scoped: if we already track a doc whose
|
||||||
|
* `remoteRelativePath` matches the path we're about to POST, the
|
||||||
|
* server's existing doc at that path is exactly the one we'd alias
|
||||||
|
* into. Bumping `last_seen` to that record's `parentVersionId`
|
||||||
|
* forces the server's `creation_vuid > last_seen` check to fail and
|
||||||
|
* fall through to the deconflict path. For paths we don't yet
|
||||||
|
* track, we send the regular `min` watermark — so a legitimate
|
||||||
|
* cross-device merge (two clients independently creating the same
|
||||||
|
* path) still fires when neither side holds a record for the
|
||||||
|
* collision target.
|
||||||
|
*/
|
||||||
|
public lastSeenUpdateIdForCreate(
|
||||||
|
requestPath: RelativePath
|
||||||
|
): VaultUpdateId {
|
||||||
|
let watermark = this._lastSeenUpdateId.min;
|
||||||
|
for (const record of this.byDocId.values()) {
|
||||||
|
if (
|
||||||
|
record.remoteRelativePath === requestPath &&
|
||||||
|
record.parentVersionId > watermark
|
||||||
|
) {
|
||||||
|
watermark = record.parentVersionId;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return watermark;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Pin an additional ignore pattern that survives setting reloads. Used
|
* Pin an additional ignore pattern that survives setting reloads. Used
|
||||||
* by the Syncer to hide internal scratch paths (e.g. `.vaultlink/**`
|
* by the Syncer to hide internal scratch paths (e.g. `.vaultlink/**`
|
||||||
|
|
|
||||||
|
|
@ -500,9 +500,16 @@ export class Syncer {
|
||||||
// `updatePendingCreatePath` mutates queued creates when a not-yet-sent
|
// `updatePendingCreatePath` mutates queued creates when a not-yet-sent
|
||||||
// local file is renamed, so a renamed-away generation does not create
|
// local file is renamed, so a renamed-away generation does not create
|
||||||
// a server document at a path that a newer local file has reused.
|
// a server document at a path that a newer local file has reused.
|
||||||
|
//
|
||||||
|
// `lastSeenUpdateIdForCreate(requestPath)` (rather than the contiguous
|
||||||
|
// `lastSeenUpdateId`) blocks the server from path-merging this POST
|
||||||
|
// into a doc we already track at the same path. Without that, a
|
||||||
|
// same-device rename race can alias two physically distinct local
|
||||||
|
// files onto one docId. See `SyncEventQueue.lastSeenUpdateIdForCreate`.
|
||||||
const response = await this.syncService.create({
|
const response = await this.syncService.create({
|
||||||
relativePath: requestPath,
|
relativePath: requestPath,
|
||||||
lastSeenVaultUpdateId: this.queue.lastSeenUpdateId,
|
lastSeenVaultUpdateId:
|
||||||
|
this.queue.lastSeenUpdateIdForCreate(requestPath),
|
||||||
contentBytes
|
contentBytes
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -41,9 +41,12 @@ echo "Server started with PID: $server_pid"
|
||||||
|
|
||||||
# Ensure server is killed on script exit
|
# Ensure server is killed on script exit
|
||||||
cleanup_server() {
|
cleanup_server() {
|
||||||
echo "Stopping server (PID: $server_pid)..."
|
if [ -n "$server_pid" ]; then
|
||||||
kill $server_pid 2>/dev/null || true
|
echo "Stopping server (PID: $server_pid)..."
|
||||||
wait $server_pid 2>/dev/null || true
|
kill $server_pid 2>/dev/null || true
|
||||||
|
wait $server_pid 2>/dev/null || true
|
||||||
|
server_pid=""
|
||||||
|
fi
|
||||||
}
|
}
|
||||||
trap cleanup_server EXIT
|
trap cleanup_server EXIT
|
||||||
|
|
||||||
|
|
@ -127,6 +130,7 @@ while true; do
|
||||||
done
|
done
|
||||||
|
|
||||||
if $all_done; then
|
if $all_done; then
|
||||||
|
cleanup_server
|
||||||
echo "All processes completed successfully"
|
echo "All processes completed successfully"
|
||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue