Fix slow commit bug
This commit is contained in:
parent
eb23f445d0
commit
0329fc29f2
7 changed files with 88 additions and 40 deletions
|
|
@ -103,6 +103,7 @@ import { renamedPendingCreateReusedPathThenDeleteTest } from "./tests/renamed-pe
|
|||
import { renamePendingCreateOntoPendingDeletePathTest } from "./tests/rename-pending-create-onto-pending-delete-path.test";
|
||||
import { remoteQuickWriteRenameBeforeRecordTest } from "./tests/remote-quick-write-rename-before-record.test";
|
||||
import { selfMergePendingRenameAliasesSecondCreateTest } from "./tests/self-merge-pending-rename-aliases-second-create.test";
|
||||
import { disableMidCreateThenDeleteTest } from "./tests/disable-mid-create-then-delete.test";
|
||||
|
||||
export const TESTS: Partial<Record<string, TestDefinition>> = {
|
||||
"rename-create-conflict": renameCreateConflictTest,
|
||||
|
|
@ -239,5 +240,6 @@ export const TESTS: Partial<Record<string, TestDefinition>> = {
|
|||
"remote-quick-write-rename-before-record":
|
||||
remoteQuickWriteRenameBeforeRecordTest,
|
||||
"self-merge-pending-rename-aliases-second-create":
|
||||
selfMergePendingRenameAliasesSecondCreateTest
|
||||
selfMergePendingRenameAliasesSecondCreateTest,
|
||||
"disable-mid-create-then-delete": disableMidCreateThenDeleteTest
|
||||
};
|
||||
|
|
|
|||
|
|
@ -0,0 +1,56 @@
|
|||
import type { AssertableState } from "../utils/assertable-state";
|
||||
import type { TestDefinition } from "../test-definition";
|
||||
|
||||
export const disableMidCreateThenDeleteTest: TestDefinition = {
|
||||
description:
|
||||
"Reproduces a fuzz failure where one client's create-then-delete-then-disable-sync " +
|
||||
"sequence loses the file: the create commits server-side, the response is " +
|
||||
"lost (sync reset), the local file is deleted, then sync is re-enabled. The " +
|
||||
"catch-up replay should redeliver the create so both clients converge to " +
|
||||
"having the file (the delete never reached the server because its docId " +
|
||||
"Promise was rejected when the queue cleared).",
|
||||
clients: 2,
|
||||
steps: [
|
||||
// Client 0 is online (the witness); client 1 starts disabled.
|
||||
{ type: "enable-sync", client: 0 },
|
||||
|
||||
// Client 1 creates the file while offline so the LocalCreate is queued.
|
||||
{ type: "create", client: 1, path: "file-32.md", content: "hello" },
|
||||
|
||||
// Arm the drop so client 1's create POST commits server-side but the
|
||||
// response is replaced with SyncResetError (matches the fuzz scenario
|
||||
// where sync was disabled mid-flight).
|
||||
{ type: "drop-next-create-response", client: 1 },
|
||||
|
||||
// Enable sync on client 1: offline scan picks up file-32, drain fires
|
||||
// POST /documents, server commits, broadcast goes out, response is
|
||||
// dropped on the client. SyncResetError exits the drain leaving the
|
||||
// create event still in the queue.
|
||||
{ type: "enable-sync", client: 1 },
|
||||
{ type: "wait-for-dropped-create-response", client: 1 },
|
||||
|
||||
// The user then deletes the file locally and toggles sync off/on
|
||||
// (the same flow the fuzz harness used). The disable's pause()
|
||||
// does not clear the queue, but the re-enable runs an offline
|
||||
// scan that calls clearPending() — wiping the dangling LocalCreate
|
||||
// and any LocalDelete behind it. The local disk is empty, so
|
||||
// nothing is enqueued.
|
||||
{ type: "delete", client: 1, path: "file-32.md" },
|
||||
{ type: "disable-sync", client: 1 },
|
||||
{ type: "enable-sync", client: 1 },
|
||||
|
||||
// Catch-up on the new WS connection should deliver file-32 (vault
|
||||
// update id 1) since client 1's lastSeenUpdateId is still 0.
|
||||
{ type: "barrier" },
|
||||
|
||||
{
|
||||
type: "assert-consistent",
|
||||
verify: (s: AssertableState): void => {
|
||||
s.assertFileExists("file-32.md").assertContent(
|
||||
"file-32.md",
|
||||
"hello"
|
||||
);
|
||||
}
|
||||
}
|
||||
]
|
||||
};
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
#!/bin/bash
|
||||
|
||||
rm -rf /host/tmp/vaultlink-e2e-databases
|
||||
rm -rf /tmp/vaultlink-e2e-databases
|
||||
rm -rf logs
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ sleep 1
|
|||
|
||||
# Clean databases (uses tmpfs via /dev/shm for zero disk I/O)
|
||||
echo "Cleaning databases..."
|
||||
rm -rf /host/tmp/vaultlink-e2e-databases
|
||||
rm -rf /tmp/databases
|
||||
|
||||
# Start the server in the background
|
||||
echo "Starting server..."
|
||||
|
|
|
|||
|
|
@ -793,23 +793,18 @@ impl Database {
|
|||
.await
|
||||
.context("Failed to commit transaction")?;
|
||||
|
||||
// For non-delete writes the originating device already has
|
||||
// authoritative state from its HTTP response, so we tag the
|
||||
// broadcast with `origin_device_id` and the send task in
|
||||
// `websocket.rs` filters it out for that device. Deletes are
|
||||
// delivered to *every* connected client including the author —
|
||||
// the originator only removes the document from its sync queue
|
||||
// once it receives this receipt.
|
||||
// Broadcast every commit to every connected client, including
|
||||
// the originator. The HTTP response is the originator's normal
|
||||
// path to learn its own update, but if the response is lost
|
||||
// (sync reset, dropped TCP) the broadcast is the only remaining
|
||||
// delivery channel — and the client-side `parentVersionId`
|
||||
// dedup absorbs the redundant message when the response made it
|
||||
// through.
|
||||
let envelope = WebSocketServerMessage::VaultUpdate(WebSocketVaultUpdate {
|
||||
document: version.clone().into(),
|
||||
});
|
||||
let with_origin = if version.is_deleted {
|
||||
WebSocketServerMessageWithOrigin::new(envelope)
|
||||
} else {
|
||||
WebSocketServerMessageWithOrigin::with_origin(version.device_id.clone(), envelope)
|
||||
};
|
||||
self.broadcasts
|
||||
.send_document_update(vault_id, with_origin)?;
|
||||
.send_document_update(vault_id, WebSocketServerMessageWithOrigin::new(envelope))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -80,28 +80,13 @@ pub enum WebSocketServerMessage {
|
|||
CursorPositions(CursorPositionFromServer),
|
||||
}
|
||||
|
||||
/// Broadcast envelope carrying the message plus the device that produced
|
||||
/// it. The per-recipient send task compares `origin_device_id` against
|
||||
/// its own device id to fill in `originates_from_self` before the message
|
||||
/// is serialized on the wire.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct WebSocketServerMessageWithOrigin {
|
||||
pub origin_device_id: Option<DeviceId>,
|
||||
pub message: WebSocketServerMessage,
|
||||
}
|
||||
|
||||
impl WebSocketServerMessageWithOrigin {
|
||||
pub fn new(message: WebSocketServerMessage) -> Self {
|
||||
Self {
|
||||
origin_device_id: None,
|
||||
message,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_origin(origin_device_id: DeviceId, message: WebSocketServerMessage) -> Self {
|
||||
Self {
|
||||
origin_device_id: Some(origin_device_id),
|
||||
message,
|
||||
}
|
||||
Self { message }
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -208,14 +208,24 @@ async fn websocket(
|
|||
loop {
|
||||
match broadcast_receiver.recv().await {
|
||||
Ok(update) => {
|
||||
// Drop messages this device authored because the HTTP
|
||||
// response already carried authoritative state back.
|
||||
// Delete broadcasts are sent without an origin so the
|
||||
// author also receives them — that's the receipt the
|
||||
// client needs to drop the doc from its sync queue.
|
||||
if Some(&device_id) == update.origin_device_id.as_ref() {
|
||||
continue;
|
||||
}
|
||||
// Always deliver vault updates to the originating
|
||||
// device too. The HTTP response is the *normal* path
|
||||
// for the originator to learn its own update, and
|
||||
// the client-side wire loop dedupes redundant
|
||||
// broadcasts via the `parentVersionId` check. But
|
||||
// when the response is lost mid-flight (sync reset,
|
||||
// pause/resume, dropped TCP) the originator has no
|
||||
// record of the doc; if the broadcast is also
|
||||
// suppressed AND the next handshake's cursor was
|
||||
// captured before the commit (cursor < vuid), the
|
||||
// doc falls through both delivery paths and is
|
||||
// stranded forever on the originator. Letting the
|
||||
// self-broadcast through closes that window — the
|
||||
// message processes as a remote create on the
|
||||
// originator's reconnected WS and the file is
|
||||
// restored. (Cursor messages still get the inner
|
||||
// self-filter below; we drop our own cursor entries
|
||||
// from the `clients` payload.)
|
||||
|
||||
// Filter out vault updates already covered by the
|
||||
// catch-up snapshot. The handshake atomically
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue