Remove isNewFile
This commit is contained in:
parent
d8b6ec5b77
commit
ce995cdc33
8 changed files with 18 additions and 61 deletions
|
|
@ -118,7 +118,7 @@ Local FS events from the watcher update `localPath` synchronously at enqueue tim
|
|||
|
||||
**Watermark.** `lastSeenUpdateId` uses a `MinCovered` (a contiguous-prefix tracker over a stream of integers): we only advance the published min when the next consecutive id has been processed, so out-of-order RemoteChange ids don't fool the WebSocket handshake into requesting a too-recent catch-up.
|
||||
|
||||
**Server catch-up.** The server's WS handshake replays events newer than the client's `last_seen_vault_update_id` from the `latest_document_versions` view (one row per doc, the latest). On those replayed rows `is_new_file` means _new to this client_ (`creation_vault_update_id > last_seen_vault_update_id`), not "this row is the doc's first version" — necessary because the catch-up only carries the latest version; if a doc was created and updated past the watermark, the client never sees its create otherwise.
|
||||
**Server catch-up.** The server's WS handshake replays events newer than the client's `last_seen_vault_update_id`, computed as the latest version per document as of the cursor. The catch-up only carries each doc's *latest* version, not its full history. The client treats any RemoteChange whose `documentId` it has no record of as a fresh create and downloads the bytes.
|
||||
|
||||
## Edge-case patterns the sync engine has to survive
|
||||
|
||||
|
|
@ -134,8 +134,6 @@ The two-loop split defuses most of the old race catalogue (slot-collision stashe
|
|||
|
||||
**Watermark advancement is load-bearing both ways.** Branches that _skip_ a remote event without advancing `lastSeenUpdateId` create permanent gaps that re-deliver forever. Branches that _advance_ without applying the content lose data: the server has no further event to re-deliver, the catch-up only carries the latest version, and any state in between is gone. Don't advance unless the event was actually applied (or deliberately discarded after weighing both halves).
|
||||
|
||||
**`isNewFile` semantics differ between catch-up and real-time.** On WS handshake replay it means _new to this client_ (`creation_vault_update_id > last_seen_vault_update_id`); on real-time broadcasts it means _this version is the create_ (`creation_vault_update_id == vault_update_id`). A handler that decides based on one interpretation will be wrong on the other channel; reasoning about fetch-and-treat-as-new vs. ignore needs to know which channel delivered the event.
|
||||
|
||||
**Pause / disable-sync mid-flight** is the one race the new model doesn't structurally fix. An HTTP that committed server-side but whose response was discarded leaves the server holding a doc the client has no record of. Resume → offline scan → server-side dedupe handles it (the server merges the duplicate create into the existing doc), but if the merge produces a deconflict, the client picks up an extra file. Out of scope for the two-loop split.
|
||||
|
||||
**Cycle reconciliation uses in-memory content swap.** When the move graph contains a cycle, the reconciler reads every file in the cycle into memory and writes each back to its new slot, with no tmp files. A write-ahead marker at `.vaultlink/swap-<uuid>.json` lists each leg; on startup the reconciler reads the marker, hashes each `from` to determine which legs ran, and replays the rest. The `.vaultlink/**` glob is hard-coded as an internal ignore pattern so swap markers don't get sync'd.
|
||||
|
|
|
|||
|
|
@ -5,14 +5,12 @@ export const catchupCreateAndUpdateNotSkippedTest: TestDefinition = {
|
|||
description:
|
||||
"Client 1 disconnects (sync disabled). Client 0 creates a doc and " +
|
||||
"then updates it. When Client 1 reconnects, the server's catch-up " +
|
||||
"stream sends only the doc's *latest* version (the update), not the " +
|
||||
"full history. Pre-fix the wire's `is_new_file` was set to " +
|
||||
"`creation == latest_version`, so the catch-up flagged the doc as " +
|
||||
"non-new even though Client 1 had never seen its creation. Client " +
|
||||
"1's `processRemoteChange` then dropped it as a 'stale RemoteChange " +
|
||||
"for untracked, non-new document' and the doc was silently lost. " +
|
||||
"Post-fix `is_new_file` in the catch-up stream means 'new relative " +
|
||||
"to the recipient's watermark' (`creation > last_seen_vault_update_id`).",
|
||||
"stream sends only the doc's *latest* version (the update), not " +
|
||||
"the full history. Client 1 must still pick up the doc — any handler " +
|
||||
"that gates the create-on-untracked path on a server-supplied " +
|
||||
"'is this the first version' flag would drop it (the latest version " +
|
||||
"is not the create), silently leaking the doc. The client treats " +
|
||||
"every untracked-doc RemoteChange as a fresh create.",
|
||||
clients: 2,
|
||||
steps: [
|
||||
{ type: "enable-sync", client: 0 },
|
||||
|
|
@ -36,7 +34,7 @@ export const catchupCreateAndUpdateNotSkippedTest: TestDefinition = {
|
|||
|
||||
// Client 0 updates the doc (vault_update_id v_X > v_C). The
|
||||
// server's `latest_document_versions` view now returns the
|
||||
// *update* row — its `creation_vault_update_id != vault_update_id`.
|
||||
// *update* row — the create row is no longer the latest.
|
||||
{
|
||||
type: "update",
|
||||
client: 0,
|
||||
|
|
@ -46,10 +44,9 @@ export const catchupCreateAndUpdateNotSkippedTest: TestDefinition = {
|
|||
{ type: "sync", client: 0 },
|
||||
|
||||
// Client 1 reconnects. Server's catch-up replays docs with
|
||||
// `vault_update_id > last_seen`. For doc.md it sends v_X with
|
||||
// `is_new_file` derived from `creation_vault_update_id >
|
||||
// last_seen_vault_update_id` (post-fix) — so Client 1 treats it
|
||||
// as a fresh create and downloads the latest content.
|
||||
// `vault_update_id > last_seen`. For doc.md it sends v_X; Client
|
||||
// 1 has no record of the doc, so it treats the RemoteChange as a
|
||||
// fresh create and downloads the latest content.
|
||||
{ type: "enable-sync", client: 1 },
|
||||
{ type: "barrier" },
|
||||
|
||||
|
|
|
|||
|
|
@ -9,8 +9,4 @@ export interface DocumentVersionWithoutContent {
|
|||
userId: string;
|
||||
deviceId: string;
|
||||
contentSize: number;
|
||||
/**
|
||||
* True iff this is the first version of the document
|
||||
*/
|
||||
isNewFile: boolean;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -66,7 +66,6 @@ function fakeRemoteVersion(
|
|||
userId: "user",
|
||||
deviceId: "device",
|
||||
contentSize: 100,
|
||||
isNewFile: true,
|
||||
...overrides
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -618,9 +618,8 @@ export class SyncEventQueue {
|
|||
// in the queue ahead of it. Once those drain and the doc is
|
||||
// removed, a still-pending RemoteChange for an earlier version
|
||||
// would be processed by `processRemoteCreateForNewDocument` (the
|
||||
// doc is now untracked, and catch-up's `isNewFile=true` semantics
|
||||
// qualify it as a fresh create), resurrecting the doc on disk
|
||||
// with stale bytes that disagree with every other agent.
|
||||
// doc is now untracked), resurrecting the doc on disk with stale
|
||||
// bytes that disagree with every other agent.
|
||||
this.purgeRemoteChangesForDocumentId(documentId);
|
||||
return this.save();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -703,8 +703,7 @@ export class Syncer {
|
|||
if (response.isDeleted) {
|
||||
await this.processRemoteDelete(record.localPath, {
|
||||
...response,
|
||||
contentSize: 0,
|
||||
isNewFile: false
|
||||
contentSize: 0
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
|
@ -859,14 +858,6 @@ export class Syncer {
|
|||
return this.processRemoteUpdate(trackedRecord, remoteVersion);
|
||||
}
|
||||
|
||||
if (!remoteVersion.isNewFile) {
|
||||
this.queue.lastSeenUpdateId = remoteVersion.vaultUpdateId;
|
||||
this.logger.debug(
|
||||
`Ignoring stale RemoteChange for untracked, non-new document ${remoteVersion.documentId}`
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
return this.processRemoteCreateForNewDocument(remoteVersion);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -405,7 +405,6 @@ impl Database {
|
|||
r#"
|
||||
select
|
||||
vault_update_id,
|
||||
creation_vault_update_id,
|
||||
document_id as "document_id: Hyphenated",
|
||||
relative_path,
|
||||
updated_date as "updated_date: chrono::DateTime<Utc>",
|
||||
|
|
@ -439,7 +438,6 @@ impl Database {
|
|||
user_id: row.user_id,
|
||||
device_id: row.device_id,
|
||||
content_size: row.content_size.unwrap_or(0),
|
||||
is_new_file: row.creation_vault_update_id == row.vault_update_id,
|
||||
})
|
||||
.collect()
|
||||
})
|
||||
|
|
@ -466,19 +464,14 @@ impl Database {
|
|||
// cursor capture (under broadcast send-lock) and this query
|
||||
// (which runs after drop-lock) would expose a `vault_update_id
|
||||
// > cursor` row that the cursor filter then drops, removing
|
||||
// the doc from the catch-up entirely. The post-cursor live
|
||||
// broadcast then carries `is_new_file = false` (per real-time
|
||||
// semantics it's an update of a previously-existing version),
|
||||
// and the receiving client — which has no record of the doc —
|
||||
// ignores it as stale, stranding the doc forever. Computing
|
||||
// the snapshot from the documents table directly with the
|
||||
// upper bound applied at the GROUP BY layer keeps the
|
||||
// catch-up self-contained at exactly the cursor.
|
||||
// the doc from the catch-up entirely. Computing the snapshot
|
||||
// from the documents table directly with the upper bound
|
||||
// applied at the GROUP BY layer keeps the catch-up
|
||||
// self-contained at exactly the cursor.
|
||||
let query = sqlx::query!(
|
||||
r#"
|
||||
select
|
||||
d.vault_update_id,
|
||||
d.creation_vault_update_id,
|
||||
d.document_id as "document_id: Hyphenated",
|
||||
d.relative_path,
|
||||
d.updated_date as "updated_date: chrono::DateTime<Utc>",
|
||||
|
|
@ -523,17 +516,6 @@ impl Database {
|
|||
user_id: row.user_id,
|
||||
device_id: row.device_id,
|
||||
content_size: row.content_size.unwrap_or(0),
|
||||
// For catch-up streams, "new file" means "new to this
|
||||
// recipient" — the doc was created past the recipient's
|
||||
// watermark. The catch-up only carries the doc's
|
||||
// *latest* version (not its full history), so using
|
||||
// `creation == latest` instead would mis-flag every
|
||||
// doc that was created and then updated before the
|
||||
// client reconnected, and the client's
|
||||
// `processRemoteChange` would drop it as "stale
|
||||
// RemoteChange for untracked, non-new document",
|
||||
// silently leaking docs to clients catching up.
|
||||
is_new_file: row.creation_vault_update_id > vault_update_id,
|
||||
})
|
||||
.collect()
|
||||
})
|
||||
|
|
|
|||
|
|
@ -46,14 +46,10 @@ pub struct DocumentVersionWithoutContent {
|
|||
|
||||
#[ts(type = "number")]
|
||||
pub content_size: u64,
|
||||
|
||||
/// True iff this is the first version of the document
|
||||
pub is_new_file: bool,
|
||||
}
|
||||
|
||||
impl From<StoredDocumentVersion> for DocumentVersionWithoutContent {
|
||||
fn from(value: StoredDocumentVersion) -> Self {
|
||||
let is_new_file = value.creation_vault_update_id == value.vault_update_id;
|
||||
Self {
|
||||
vault_update_id: value.vault_update_id,
|
||||
document_id: value.document_id,
|
||||
|
|
@ -63,7 +59,6 @@ impl From<StoredDocumentVersion> for DocumentVersionWithoutContent {
|
|||
user_id: value.user_id,
|
||||
device_id: value.device_id,
|
||||
content_size: value.content.len() as u64,
|
||||
is_new_file,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue