Fix deletions
Some checks failed
Check / build (pull_request) Has been cancelled
E2E tests / build (pull_request) Has been cancelled
Publish CLI / publish-docker (pull_request) Has been cancelled
Publish server Docker image / publish-docker (pull_request) Has been cancelled

This commit is contained in:
Andras Schmelczer 2026-05-14 20:58:14 +01:00
parent 935ed9c8e7
commit 36695e9361
4 changed files with 43 additions and 17 deletions

View file

@ -94,6 +94,16 @@ export class SyncEventQueue {
// `clearAllState` / schema-version-mismatch reset.
private readonly _pendingServerDeletes = new Set<DocumentId>();
// DocIds we've seen deleted in this session. `removeDocumentById`
// adds here so that any stale `RemoteChange` for that doc that
// arrives later (e.g. an older vuid buffered in the network-chaos
// jitter pipeline, or a re-enqueue that landed after the delete's
// `purgeRemoteChangesForDocumentId`) is recognised in
// `processRemoteChange` and skipped instead of falling through to
// `processRemoteCreateForNewDocument` and resurrecting the doc
// with pre-delete bytes. Cleared on `clearAllState`.
private readonly _deletedDocumentIds = new Set<DocumentId>();
public constructor(
private readonly settings: Settings,
private readonly logger: Logger,
@ -605,6 +615,15 @@ export class SyncEventQueue {
}
public async removeDocumentById(documentId: DocumentId): Promise<void> {
// Record the tombstone unconditionally: `processRemoteChange`
// checks it to drop late RemoteChanges that would otherwise
// resurrect the doc via `processRemoteCreateForNewDocument`.
// Purging the queue (below) only catches events that are
// already enqueued; events that arrive after this point (e.g.
// a stale broadcast buffered in the network-chaos jitter
// pipeline, or a re-enqueue that lands after this purge) need
// the tombstone to be skipped.
this._deletedDocumentIds.add(documentId);
const record = this.byDocId.get(documentId);
if (record === undefined) {
// Still clear any deletion-pending mark and purge stale
@ -634,6 +653,10 @@ export class SyncEventQueue {
return this.save();
}
public hasBeenDeleted(documentId: DocumentId): boolean {
return this._deletedDocumentIds.has(documentId);
}
/**
* Mark a doc as "HTTP DELETE has been acked by the server but the
* WebSocket receipt that would call `removeDocumentById` hasn't arrived
@ -739,6 +762,7 @@ export class SyncEventQueue {
this.byDocId.clear();
this._byLocalPath.clear();
this._pendingServerDeletes.clear();
this._deletedDocumentIds.clear();
this._lastSeenUpdateId.reset();
await this.save();
}

View file

@ -876,6 +876,23 @@ export class Syncer {
return this.processRemoteUpdate(trackedRecord, remoteVersion);
}
// Tombstoned: we removed this doc in this session via
// `removeDocumentById` (either WS delete receipt or PUT response
// with `isDeleted=true`). A late RemoteChange for the same doc
// can still reach us — buffered in the network-chaos jitter
// pipeline, or re-enqueued after the delete's purge — and
// without this gate `processRemoteCreateForNewDocument` would
// happily fetch pre-delete bytes and resurrect the doc, blocking
// any other doc whose `remoteRelativePath` happens to be the
// same slot.
if (this.queue.hasBeenDeleted(remoteVersion.documentId)) {
this.queue.lastSeenUpdateId = remoteVersion.vaultUpdateId;
this.logger.debug(
`Discarding stale remote update for tombstoned ${remoteVersion.documentId} at ${remoteVersion.relativePath}`
);
return;
}
return this.processRemoteCreateForNewDocument(remoteVersion);
}

View file

@ -7,7 +7,7 @@ import { randomCasing } from "./utils/random-casing";
import { TimeoutError } from "./utils/with-timeout";
import { TestErrorTracker } from "./utils/test-error-tracker";
const TEST_ITERATIONS = 5;
const TEST_ITERATIONS = 50;
const MAX_INITIAL_DOCS = 10;
// Simulate async file access by injecting waiting time before returning from file operations.

View file

@ -91,25 +91,10 @@ print_failed_log() {
return 1
}
E2E_TIMEOUT=${2:-3600}
start_time=$(date +%s)
echo "Monitoring $process_count processes (timeout: ${E2E_TIMEOUT}s)"
echo "Monitoring $process_count processes"
# Monitor processes
while true; do
# Script-level timeout to prevent indefinite hangs
current_time=$(date +%s)
elapsed=$((current_time - start_time))
if [ $elapsed -ge $E2E_TIMEOUT ]; then
echo "E2E timeout reached (${E2E_TIMEOUT}s). Killing remaining processes."
for pid in "${pids[@]}"; do
if [ -n "$pid" ]; then
kill $pid 2>/dev/null || true
fi
done
exit 1
fi
if print_failed_log; then
# Kill remaining processes
for pid in "${pids[@]}"; do