diff --git a/frontend/sync-client/package.json b/frontend/sync-client/package.json index aa369fa7..45c33764 100644 --- a/frontend/sync-client/package.json +++ b/frontend/sync-client/package.json @@ -14,19 +14,17 @@ }, "devDependencies": { "byte-base64": "^1.1.0", - "minimatch": "^10.0.1", - "p-queue": "^8.1.0", + "minimatch": "^10.1.1", + "p-queue": "^9.0.1", "reconcile-text": "^0.8.0", - "uuid": "^13.0.0", - "@types/node": "^24.8.1", - "ts-loader": "^9.5.2", + "@types/node": "^25.0.2", + "ts-loader": "^9.5.4", "tslib": "2.8.1", - "tsx": "^4.20.6", - "typescript": "5.8.3", - "webpack": "^5.99.9", + "tsx": "^4.21.0", + "typescript": "5.9.3", + "webpack": "^5.103.0", "webpack-cli": "^6.0.1", "webpack-merge": "^6.0.1", - "@sentry/browser": "^10.8.0", - "ws": "^8.18.3" + "@sentry/browser": "^10.30.0" } } diff --git a/frontend/sync-client/src/index.ts b/frontend/sync-client/src/index.ts index cfcc5071..f06523a6 100644 --- a/frontend/sync-client/src/index.ts +++ b/frontend/sync-client/src/index.ts @@ -2,6 +2,7 @@ import { awaitAll } from "./utils/await-all"; import { logToConsole } from "./utils/debugging/log-to-console"; import { slowFetchFactory } from "./utils/debugging/slow-fetch-factory"; import { slowWebSocketFactory } from "./utils/debugging/slow-web-socket-factory"; +import { InMemoryFileSystem } from "./utils/debugging/in-memory-file-system"; import { getRandomColor } from "./utils/get-random-color"; import { lineAndColumnToPosition } from "./utils/line-and-column-to-position"; import { positionToLineAndColumn } from "./utils/position-to-line-and-column"; @@ -21,14 +22,19 @@ export { export { Logger, LogLevel, LogLine } from "./tracing/logger"; export { type SyncSettings, DEFAULT_SETTINGS } from "./persistence/settings"; export { rateLimit } from "./utils/rate-limit"; -export type { RelativePath, StoredDatabase } from "./persistence/database"; +export type { + RelativePath, + StoredSyncState as StoredDatabase, + DocumentRecord +} from "./sync-operations/types"; export type { FileSystemOperations } from "./file-operations/filesystem-operations"; export type { PersistenceProvider } from "./persistence/persistence"; export type { CursorSpan } from "./services/types/CursorSpan"; export type { ClientCursors } from "./services/types/ClientCursors"; export type { NetworkConnectionStatus } from "./types/network-connection-status"; -export type { ServerVersionMismatchError } from "./services/server-version-mismatch-error"; -export type { AuthenticationError } from "./services/authentication-error"; +export type { ServerVersionMismatchError } from "./errors/server-version-mismatch-error"; +export type { AuthenticationError } from "./errors/authentication-error"; +export { SyncResetError } from "./errors/sync-reset-error"; export type { MaybeOutdatedClientCursors } from "./types/maybe-outdated-client-cursors"; export { DocumentSyncStatus } from "./types/document-sync-status"; export { SyncClient } from "./sync-client"; @@ -37,7 +43,8 @@ export type { TextWithCursors, CursorPosition } from "reconcile-text"; export const debugging = { slowFetchFactory, slowWebSocketFactory, - logToConsole + logToConsole, + InMemoryFileSystem }; export const utils = { diff --git a/frontend/sync-client/src/sync-client.ts b/frontend/sync-client/src/sync-client.ts index 2a272c86..dd537296 100644 --- a/frontend/sync-client/src/sync-client.ts +++ b/frontend/sync-client/src/sync-client.ts @@ -2,8 +2,12 @@ import type { PersistenceProvider } from "./persistence/persistence"; import type { HistoryEntry, HistoryStats } from "./tracing/sync-history"; import { SyncHistory } from "./tracing/sync-history"; import { Logger, LogLevel, LogLine } from "./tracing/logger"; -import type { RelativePath, StoredDatabase } from "./persistence/database"; -import { Database } from "./persistence/database"; +import type { + DocumentId, + RelativePath, + StoredSyncState +} from "./sync-operations/types"; +import { SyncEventQueue } from "./sync-operations/sync-event-queue"; import * as Sentry from "@sentry/browser"; import type { SyncSettings } from "./persistence/settings"; import { DEFAULT_SETTINGS, Settings } from "./persistence/settings"; @@ -12,7 +16,6 @@ import { Syncer } from "./sync-operations/syncer"; import type { FileSystemOperations } from "./file-operations/filesystem-operations"; import { FileOperations } from "./file-operations/file-operations"; import { FetchController } from "./services/fetch-controller"; -import { UnrestrictedSyncer } from "./sync-operations/unrestricted-syncer"; import { rateLimit } from "./utils/rate-limit"; import type { NetworkConnectionStatus } from "./types/network-connection-status"; import { DocumentSyncStatus } from "./types/document-sync-status"; @@ -24,42 +27,46 @@ import type { MaybeOutdatedClientCursors } from "./types/maybe-outdated-client-c import { FileChangeNotifier } from "./sync-operations/file-change-notifier"; import { FixedSizeDocumentCache } from "./utils/data-structures/fix-sized-cache"; import { setUpTelemetry } from "./utils/set-up-telemetry"; -import { DIFF_CACHE_SIZE_MB } from "./consts"; import { ServerConfig } from "./services/server-config"; import type { EventListeners } from "./utils/data-structures/event-listeners"; +import { Lock } from "./utils/data-structures/locks"; +import { ExpectedFsEvents } from "./sync-operations/expected-fs-events"; export class SyncClient { - private hasStartedOfflineSync = false; private hasFinishedOfflineSync = false; private hasStarted = false; private hasBeenDestroyed = false; private unloadTelemetry?: () => void; private isDestroying = false; private readonly eventUnsubscribers: (() => void)[] = []; + private readonly settingsChangeLock = new Lock( + "SyncClient.onSettingsChange" + ); private constructor( + public readonly logger: Logger, private readonly history: SyncHistory, private readonly settings: Settings, - private readonly database: Database, + private readonly syncEventQueue: SyncEventQueue, private readonly syncer: Syncer, private readonly webSocketManager: WebSocketManager, - public readonly logger: Logger, private readonly fetchController: FetchController, private readonly cursorTracker: CursorTracker, private readonly fileChangeNotifier: FileChangeNotifier, private readonly contentCache: FixedSizeDocumentCache, - private readonly fileOperations: FileOperations, private readonly serverConfig: ServerConfig, + private readonly syncService: SyncService, + private readonly expectedFsEvents: ExpectedFsEvents, private readonly persistence: PersistenceProvider< Partial<{ settings: Partial; - database: Partial; + database: Partial; }> > ) {} - public get documentCount(): number { - return this.database.length; + public get syncedDocumentCount(): number { + return this.syncEventQueue.syncedDocumentCount; } public get isWebSocketConnected(): boolean { @@ -73,6 +80,27 @@ export class SyncClient { return this.history.onHistoryUpdated; } + /** + * Fires whenever a tracked document's local file moves on disk — + * watcher-driven user renames, post-create deconflicts placed by + * the reconciler, lost-rename replays in offline scan, slot + * displacements when another record claims a path. Both + * `oldPath` and `newPath` may be `undefined` (placement-pending + * state). Useful for callers that mirror disk-side path state + * — e.g. test harnesses tracking which paths are safe to mutate + * — and need a signal beyond the user-facing history. + */ + public get onDocumentPathChanged(): EventListeners< + ( + documentId: DocumentId, + oldPath: RelativePath | undefined, + newPath: RelativePath | undefined + ) => unknown + > { + this.checkIfDestroyed("onDocumentPathChanged getter"); + return this.syncEventQueue.onDocumentPathChanged; + } + public get onSettingsChanged(): EventListeners< (newSettings: SyncSettings, oldSettings: SyncSettings) => unknown > { @@ -101,6 +129,13 @@ export class SyncClient { return this.cursorTracker.onRemoteCursorsUpdated; } + public get hasPendingWork(): boolean { + return ( + this.syncEventQueue.pendingUpdateCount > 0 || + this.webSocketManager.hasOutstandingWork + ); + } + public static async create({ fs, persistence, @@ -112,7 +147,8 @@ export class SyncClient { persistence: PersistenceProvider< Partial<{ settings: Partial; - database: Partial; + database: Partial; + deviceId: string; }> >; fetch?: typeof globalThis.fetch; @@ -121,39 +157,46 @@ export class SyncClient { }): Promise { const logger = new Logger(); - const deviceId = createClientId(); - - logger.info(`Creating SyncClient with client id ${deviceId}`); - const history = new SyncHistory(logger); let state = (await persistence.load()) ?? { settings: undefined, - database: undefined + database: undefined, + deviceId: undefined }; + // Persist deviceId across destroy + init so the server's + // lost-create dedup (which scopes by device_id) can recognise + // a retry as belonging to the same client. Without this, + // every fresh `SyncClient` after a destroy would generate a + // new deviceId, the server-side query would miss, and the + // pending-but-lost create would deconflict instead of + // binding to the doc its content was already absorbed into. + let deviceId = state.deviceId; + if (deviceId === undefined) { + deviceId = createClientId(); + state = { ...state, deviceId }; + await persistence.save(state); + } + + logger.info(`Creating SyncClient with client id ${deviceId}`); + const settings = new Settings( logger, state.settings, async (data): Promise => { state = { ...state, settings: data }; - // we're not rate-limiting settings saves as (1) we need to initialise the settings to know the rate limit - // and (2) settings changes are infrequent enough that rate-limiting is not necessary await persistence.save(state); } ); - const rateLimitedSave = rateLimit( - persistence.save, - () => settings.getSettings().minimumSaveIntervalMs - ); - - const database = new Database( + const syncEventQueue = new SyncEventQueue( + settings, logger, state.database, async (data): Promise => { state = { ...state, database: data }; - await rateLimitedSave(state); + await persistence.save(state); } ); @@ -170,32 +213,23 @@ export class SyncClient { fetch ); - const serverConfig = new ServerConfig(syncService); + const serverConfig = new ServerConfig(syncService, settings); + + const expectedFsEvents = new ExpectedFsEvents(); const fileOperations = new FileOperations( logger, - database, fs, serverConfig, + expectedFsEvents, nativeLineEndings ); const contentCache = new FixedSizeDocumentCache( - 1024 * 1024 * DIFF_CACHE_SIZE_MB - ); - const unrestrictedSyncer = new UnrestrictedSyncer( - logger, - database, - settings, - syncService, - fileOperations, - history, - contentCache, - serverConfig + 1024 * 1024 * settings.getSettings().diffCacheSizeMB ); const webSocketManager = new WebSocketManager( - deviceId, logger, settings, webSocket @@ -204,34 +238,38 @@ export class SyncClient { const syncer = new Syncer( deviceId, logger, - database, settings, - syncService, webSocketManager, fileOperations, - unrestrictedSyncer + syncService, + history, + contentCache, + serverConfig, + syncEventQueue ); const fileChangeNotifier = new FileChangeNotifier(); const cursorTracker = new CursorTracker( - database, + logger, + syncEventQueue, webSocketManager, fileOperations, fileChangeNotifier ); const client = new SyncClient( + logger, history, settings, - database, + syncEventQueue, syncer, webSocketManager, - logger, fetchController, cursorTracker, fileChangeNotifier, contentCache, - fileOperations, serverConfig, + syncService, + expectedFsEvents, persistence ); @@ -285,10 +323,10 @@ export class SyncClient { } /** - * Reload settings from disk overriding current in-memory settings. - * Missing values will be filled in from DEFAULT_SETTINGS rather than - * retaining current in-memory settings. - */ + * Reload settings from disk overriding current in-memory settings. + * Missing values will be filled in from DEFAULT_SETTINGS rather than + * retaining current in-memory settings. + */ public async reloadSettings(): Promise { this.checkIfDestroyed("reloadSettings"); @@ -320,10 +358,10 @@ export class SyncClient { } /** - * Wait for the in-flight operations to finish, reset all tracking, - * and the local database but retain the settings. - * The SyncClient can be used again after calling this method. - */ + * Wait for the in-flight operations to finish, reset all tracking, + * and the local state but retain the settings. + * The SyncClient can be used again after calling this method. + */ public async reset(): Promise { this.checkIfDestroyed("reset"); @@ -332,16 +370,16 @@ export class SyncClient { ); await this.pause(); - // clear all local state this.logger.info("Resetting SyncClient's local state"); - this.database.reset(); - await this.database.save(); // ensure the new database reads as empty + await this.syncEventQueue.clearAllState(); + await this.syncEventQueue.save(); this.resetInMemoryState(); - this.hasStartedOfflineSync = false; this.hasFinishedOfflineSync = false; this.serverConfig.reset(); - await this.startSyncing(); + if (this.settings.getSettings().isSyncEnabled) { + await this.startSyncing(); + } } public getSettings(): SyncSettings { @@ -363,40 +401,48 @@ export class SyncClient { await this.settings.setSettings(value); } - public async syncLocallyCreatedFile( - relativePath: RelativePath - ): Promise { + public syncLocallyCreatedFile(relativePath: RelativePath): void { this.checkIfDestroyed("syncLocallyCreatedFile"); - this.fileChangeNotifier.notifyOfFileChange(relativePath); - return this.syncer.syncLocallyCreatedFile(relativePath); + this.fileChangeNotifier.notifyOfFileChange(relativePath); // this is for updating cursors + if (this.expectedFsEvents.matchCreate(relativePath)) { + return; + } + + this.syncer.syncLocallyCreatedFile(relativePath); } - public async syncLocallyDeletedFile( - relativePath: RelativePath - ): Promise { - this.checkIfDestroyed("syncLocallyDeletedFile"); - - this.fileChangeNotifier.notifyOfFileChange(relativePath); - return this.syncer.syncLocallyDeletedFile(relativePath); - } - - public async syncLocallyUpdatedFile({ + public syncLocallyUpdatedFile({ oldPath, relativePath }: { oldPath?: RelativePath; relativePath: RelativePath; - }): Promise { + }): void { this.checkIfDestroyed("syncLocallyUpdatedFile"); - this.fileChangeNotifier.notifyOfFileChange(relativePath); - return this.syncer.syncLocallyUpdatedFile({ + this.fileChangeNotifier.notifyOfFileChange(relativePath); // this is for updating cursors + if (this.expectedFsEvents.matchUpdate(relativePath, oldPath)) { + return; + } + + this.syncer.syncLocallyUpdatedFile({ oldPath, relativePath }); } + public syncLocallyDeletedFile(relativePath: RelativePath): void { + this.checkIfDestroyed("syncLocallyDeletedFile"); + + this.fileChangeNotifier.notifyOfFileChange(relativePath); // this is for updating cursors + if (this.expectedFsEvents.matchDelete(relativePath)) { + return; + } + + this.syncer.syncLocallyDeletedFile(relativePath); + } + public getDocumentSyncingStatus( relativePath: RelativePath ): DocumentSyncStatus { @@ -406,16 +452,11 @@ export class SyncClient { return DocumentSyncStatus.SYNCING_IS_DISABLED; } - if (!this.syncer.isFirstSyncComplete || !this.hasFinishedOfflineSync) { + if (!this.hasFinishedOfflineSync) { return DocumentSyncStatus.SYNCING; } - const document = - this.database.getLatestDocumentByRelativePath(relativePath); - if (document === undefined) { - return DocumentSyncStatus.SYNCING; - } - return document.updates.length > 0 + return this.syncEventQueue.hasPendingEventsForPath(relativePath) ? DocumentSyncStatus.SYNCING : DocumentSyncStatus.UP_TO_DATE; } @@ -429,20 +470,20 @@ export class SyncClient { } public async waitUntilFinished(): Promise { - this.checkIfDestroyed("waitUntilIdle"); - await this.syncer.waitUntilFinished(); - await this.webSocketManager.waitUntilFinished(); - await this.database.save(); // flush all changes to disk + this.checkIfDestroyed("waitUntilFinished"); + await this.waitUntilFinishedInternal(); } /** - * Completely destroy the SyncClient, cancelling all in-progress operations. - * After calling this method, the SyncClient cannot be used again. - */ + * Completely destroy the SyncClient, cancelling all in-progress operations. + * After calling this method, the SyncClient cannot be used again. + */ public async destroy(): Promise { - this.checkIfDestroyed("destroy"); - - // Prevent concurrent destroy calls + if (this.hasBeenDestroyed) { + throw new Error( + "SyncClient has been destroyed and can no longer be used; called from destroy" + ); + } if (this.isDestroying) { this.logger.warn( "destroy() called while already destroying, ignoring" @@ -451,52 +492,92 @@ export class SyncClient { } this.isDestroying = true; - // cancel everything that's in progress - await this.pause(); + // Run cleanup in `finally` so a thrown pause() — or anything else + // mid-shutdown — still leaves the client in the disposed state + // instead of bricked with subscribers/telemetry hanging on. + try { + await this.pause(); + } finally { + this.hasBeenDestroyed = true; - this.hasBeenDestroyed = true; + this.resetInMemoryState(); - this.resetInMemoryState(); + this.eventUnsubscribers.forEach((unsubscribe) => { + unsubscribe(); + }); + this.eventUnsubscribers.length = 0; - // Clean up event listeners to prevent memory leaks - this.eventUnsubscribers.forEach((unsubscribe) => { - unsubscribe(); - }); - this.eventUnsubscribers.length = 0; + this.logger.info("SyncClient has been successfully disposed"); - this.logger.info("SyncClient has been successfully disposed"); + this.unloadTelemetry?.(); + } + } - this.unloadTelemetry?.(); + /** + * The actual drain — separated from `waitUntilFinished` so internal + * shutdown paths (`pause` / `destroy`) can wait for in-flight work + * without tripping the public `checkIfDestroyed` guard, which exists + * only to keep external callers from continuing to use a disposed + * client. + * + * Loops because a WebSocket message handler completing is what enqueues + * a `RemoteChange` into the syncer; if we awaited the syncer first and + * the WS handler second, a message arriving mid-wait would leave a fresh + * drain pending while `save()` ran. Each iteration waits for both, then + * re-checks; we exit only once both report idle in the same pass. + */ + private async waitUntilFinishedInternal(): Promise { + while ( + this.webSocketManager.hasOutstandingWork || + this.syncer.hasPendingWork + ) { + await this.webSocketManager.waitUntilFinished(); + await this.syncer.waitUntilFinished(); + } + await this.syncEventQueue.save(); } private async startSyncing(): Promise { this.checkIfDestroyed("startSyncing"); this.fetchController.finishReset(); + // Undo any earlier `pause()` stop so retryForever keeps retrying. + this.syncService.resume(); - await this.serverConfig.initialize(); + await this.serverConfig.getConfig(); + + await this.syncer.scheduleSyncForOfflineChanges(); + this.syncer.resumeDraining(); this.webSocketManager.start(); - if (!this.hasStartedOfflineSync) { - this.hasStartedOfflineSync = true; - await this.syncer.scheduleSyncForOfflineChanges(); - } - this.hasFinishedOfflineSync = true; } private async pause(): Promise { + this.hasFinishedOfflineSync = false; + this.syncer.pauseDraining(); this.fetchController.startReset(); + // Signal the service so any `retryForever` loop exits at its next + // iteration instead of continuing to retry a network request while + // the rest of the client is winding down. + this.syncService.stop(); await this.webSocketManager.stop(); - await this.waitUntilFinished(); + await this.waitUntilFinishedInternal(); + // Clear the offline-scan gate so a subsequent `startSyncing()` + // re-runs the scan; otherwise any local changes made while sync was + // paused (offline edits, deletes, renames) wouldn't be detected, and + // an incoming remote update would silently overwrite them. + this.syncer.clearOfflineScanGate(); + // Drop any expected fs events that were registered but never matched + // (e.g. an op aborted by SyncResetError). Otherwise a real user edit + // at the same path after re-enable would be swallowed. + this.expectedFsEvents.clear(); } private resetInMemoryState(): void { this.history.reset(); this.contentCache.reset(); - // don't reset the logger this.cursorTracker.reset(); this.syncer.reset(); - this.fileOperations.reset(); } private async onSettingsChange( @@ -505,36 +586,55 @@ export class SyncClient { ): Promise { this.checkIfDestroyed("onSettingsChange"); - if ( - newSettings.vaultName !== oldSettings.vaultName || - newSettings.remoteUri !== oldSettings.remoteUri - ) { - await this.reset(); - } - - if (newSettings.isSyncEnabled !== oldSettings.isSyncEnabled) { - if (newSettings.isSyncEnabled) { - await this.startSyncing(); - } else { - await this.pause(); + // Serialize listener invocations so back-to-back settings updates + // can't run reset()/pause()/startSyncing() concurrently. + await this.settingsChangeLock.withLock(async () => { + // The lock is FIFO, so by the time we run the client may have + // been destroyed in a queued invocation ahead of us. + if (this.hasBeenDestroyed) { + return; } - } - if (newSettings.diffCacheSizeMB !== oldSettings.diffCacheSizeMB) { - this.contentCache.resize(newSettings.diffCacheSizeMB * 1024 * 1024); - } + const connectionChanged = + newSettings.vaultName !== oldSettings.vaultName || + newSettings.remoteUri !== oldSettings.remoteUri; - if (newSettings.enableTelemetry !== oldSettings.enableTelemetry) { - if (newSettings.enableTelemetry) { - this.unloadTelemetry = setUpTelemetry(); - } else { - this.unloadTelemetry?.(); + if (connectionChanged) { + // reset() pauses, clears state, then starts iff isSyncEnabled + // — so any concurrent isSyncEnabled change is already applied. + await this.reset(); + } else if ( + newSettings.isSyncEnabled !== oldSettings.isSyncEnabled + ) { + if (newSettings.isSyncEnabled) { + await this.startSyncing(); + } else { + await this.pause(); + } } - } + + if (newSettings.diffCacheSizeMB !== oldSettings.diffCacheSizeMB) { + this.contentCache.resize( + newSettings.diffCacheSizeMB * 1024 * 1024 + ); + } + + if (newSettings.enableTelemetry !== oldSettings.enableTelemetry) { + if (newSettings.enableTelemetry) { + this.unloadTelemetry = setUpTelemetry(); + } else { + this.unloadTelemetry?.(); + } + } + }); } private checkIfDestroyed(origin: string): void { - if (this.hasBeenDestroyed) { + // Reject new public-API entries the moment destroy() is called, + // not after `pause()` returns. Otherwise an external caller could + // pass the guard and start mutating state while destroy() is + // tearing down the websocket / clearing caches. + if (this.hasBeenDestroyed || this.isDestroying) { throw new Error( `SyncClient has been destroyed and can no longer be used; called from ${origin}` ); diff --git a/frontend/sync-client/src/sync-operations/cursor-tracker.ts b/frontend/sync-client/src/sync-operations/cursor-tracker.ts index bdd7d9b7..c31721b1 100644 --- a/frontend/sync-client/src/sync-operations/cursor-tracker.ts +++ b/frontend/sync-client/src/sync-operations/cursor-tracker.ts @@ -1,5 +1,6 @@ import type { FileOperations } from "../file-operations/file-operations"; -import type { Database, RelativePath } from "../persistence/database"; +import type { RelativePath } from "./types"; +import type { SyncEventQueue } from "./sync-event-queue"; import type { ClientCursors } from "../services/types/ClientCursors"; import type { CursorSpan } from "../services/types/CursorSpan"; import type { DocumentWithCursors } from "../services/types/DocumentWithCursors"; @@ -10,6 +11,7 @@ import { hash } from "../utils/hash"; import type { FileChangeNotifier } from "./file-change-notifier"; import { Lock } from "../utils/data-structures/locks"; import { EventListeners } from "../utils/data-structures/event-listeners"; +import type { Logger } from "../tracing/logger"; // Cursor positions are updated separately from documents. However, a given cursor position is only // valid within a certain version of the document it belongs to. This class tracks previous and the latest @@ -22,22 +24,29 @@ export class CursorTracker { (cursors: MaybeOutdatedClientCursors[]) => unknown >(); - private readonly updateLock = new Lock(); + private readonly updateLock: Lock; private knownRemoteCursors: (ClientCursors & { upToDateness: DocumentUpToDateness; })[] = []; - private lastLocalCursorState: DocumentWithCursors[] = []; - private lastLocalCursorStateWithoutDirtyDocuments: DocumentWithCursors[] = - []; + // Cache the previously sent state as a JSON string rather than as the + // array. We mutate `documentsWithCursors` in-place after the cache check + // (setting `vaultUpdateId = null` for dirty docs); storing the array would + // alias and the next call's equality check would compare against + // post-mutation state. + private lastLocalCursorStateJson = "[]"; + private lastLocalCursorStateWithoutDirtyDocumentsJson = "[]"; public constructor( - private readonly database: Database, + logger: Logger, + private readonly queue: SyncEventQueue, private readonly webSocketManager: WebSocketManager, private readonly fileOperations: FileOperations, private readonly fileChangeNotifier: FileChangeNotifier ) { + this.updateLock = new Lock(CursorTracker.name, logger); + this.webSocketManager.onRemoteCursorsUpdateReceived.add( async (clientCursors) => { await this.updateLock.withLock(async () => { @@ -53,7 +62,7 @@ export class CursorTracker { for (const cursor of clientCursors.filter((client) => client.documentsWithCursors.every( - (doc) => doc.vault_update_id != null + (doc) => doc.vaultUpdateId != null ) )) { updatedKnownRemoteCursors.push({ @@ -77,14 +86,20 @@ export class CursorTracker { for (const clientCursor of this.knownRemoteCursors) { if ( clientCursor.documentsWithCursors.some( - (document) => - document.relative_path === relativePath + (document) => document.relativePath === relativePath ) ) { clientCursor.upToDateness = await this.getDocumentsUpToDateness(clientCursor); } } + // Drop the local-cursor send-cache so the next call re-reads + // the file. The first cache key is the editor's input, which + // doesn't change when the file content does — without this, + // a remote update flipping the file from dirty back to clean + // would never re-send the cursor with a fresh `vaultUpdateId`. + this.lastLocalCursorStateJson = ""; + this.lastLocalCursorStateWithoutDirtyDocumentsJson = ""; }) ); } @@ -95,70 +110,67 @@ export class CursorTracker { public async sendLocalCursorsToServer( documentToCursors: Record ): Promise { - const documentsWithCursors: DocumentWithCursors[] = []; + // Serialise concurrent senders so they don't interleave on the + // disk reads + state mutations and emit out-of-order cursor messages. + await this.updateLock.withLock(async () => { + const documentsWithCursors: DocumentWithCursors[] = []; - for (const [relativePath, cursors] of Object.entries( - documentToCursors - )) { - const record = - this.database.getLatestDocumentByRelativePath(relativePath); + for (const [relativePath, cursors] of Object.entries( + documentToCursors + )) { + const record = this.queue.getRecordByLocalPath(relativePath); - if (!record) { - continue; // Let's wait for the file to be created before sending cursors + if (!record) { + continue; // Let's wait for the file to be created before sending cursors + } + + documentsWithCursors.push({ + relativePath: relativePath, + documentId: record.documentId, + vaultUpdateId: record.parentVersionId, + cursors: cursors.map(({ start, end }) => ({ + start: Math.min(start, end), + end: Math.max(start, end) + })) // the client might send directional selections + }); } - if (!record.metadata) { - continue; // this is a new document, no need to sync the cursors + const beforeJson = JSON.stringify(documentsWithCursors); + if (this.lastLocalCursorStateJson === beforeJson) { + // Caching step to avoid reading the edited files all the time + return; + } + this.lastLocalCursorStateJson = beforeJson; + + for (const doc of documentsWithCursors) { + const readContent = await this.fileOperations.read( + doc.relativePath + ); + const record = this.queue.getRecordByLocalPath( + doc.relativePath + ); + if (record?.remoteHash !== (await hash(readContent))) { + doc.vaultUpdateId = null; + } } - documentsWithCursors.push({ - relative_path: relativePath, - document_id: record.documentId, - vault_update_id: record.metadata.parentVersionId, - cursors: cursors.map(({ start, end }) => ({ - start: Math.min(start, end), - end: Math.max(start, end) - })) // the client might send directional selections - }); - } - - if ( - JSON.stringify(this.lastLocalCursorState) === - JSON.stringify(documentsWithCursors) - ) { - // Caching step to avoid reading the edited files all the time - return; - } - this.lastLocalCursorState = documentsWithCursors; - - for (const doc of documentsWithCursors) { - const readContent = await this.fileOperations.read( - doc.relative_path - ); - const record = this.database.getLatestDocumentByRelativePath( - doc.relative_path - ); - if (record?.metadata?.hash !== hash(readContent)) { - doc.vault_update_id = null; + const afterJson = JSON.stringify(documentsWithCursors); + if ( + this.lastLocalCursorStateWithoutDirtyDocumentsJson === afterJson + ) { + return; } - } - if ( - JSON.stringify(this.lastLocalCursorStateWithoutDirtyDocuments) === - JSON.stringify(documentsWithCursors) - ) { - return; - } + this.lastLocalCursorStateWithoutDirtyDocumentsJson = afterJson; - this.lastLocalCursorStateWithoutDirtyDocuments = documentsWithCursors; - - this.webSocketManager.updateLocalCursors({ documentsWithCursors }); + this.webSocketManager.updateLocalCursors({ documentsWithCursors }); + }); } public reset(): void { this.knownRemoteCursors = []; - this.lastLocalCursorState = []; - this.lastLocalCursorStateWithoutDirtyDocuments = []; + this.lastLocalCursorStateJson = "[]"; + this.lastLocalCursorStateWithoutDirtyDocumentsJson = "[]"; this.updateLock.reset(); } @@ -223,35 +235,28 @@ export class CursorTracker { private async getDocumentUpToDateness( document: DocumentWithCursors ): Promise { - const record = this.database.getLatestDocumentByRelativePath( - document.relative_path - ); + const record = this.queue.getRecordByLocalPath(document.relativePath); if (!record) { // the document of the cursor must be from the future return DocumentUpToDateness.Later; } - if ( - (record.metadata?.parentVersionId ?? 0) < - (document.vault_update_id ?? 0) - ) { + if (record.parentVersionId < (document.vaultUpdateId ?? 0)) { return DocumentUpToDateness.Later; - } else if ( - (document.vault_update_id ?? 0) < - (record.metadata?.parentVersionId ?? 0) - ) { + } else if ((document.vaultUpdateId ?? 0) < record.parentVersionId) { // the document of the cursor must be from the past return DocumentUpToDateness.Prior; } const currentContent = await this.fileOperations.read( - document.relative_path + document.relativePath ); - return this.database.getLatestDocumentByRelativePath( - document.relative_path - )?.metadata?.hash === hash(currentContent) + const currentRecord = this.queue.getRecordByLocalPath( + document.relativePath + ); + return currentRecord?.remoteHash === (await hash(currentContent)) ? DocumentUpToDateness.UpToDate : DocumentUpToDateness.Prior; } diff --git a/frontend/sync-client/src/sync-operations/expected-fs-events.ts b/frontend/sync-client/src/sync-operations/expected-fs-events.ts new file mode 100644 index 00000000..a2c4f52f --- /dev/null +++ b/frontend/sync-client/src/sync-operations/expected-fs-events.ts @@ -0,0 +1,138 @@ +import type { RelativePath } from "./types"; + +/** + * Counter-based registry of filesystem events the syncer is about to + * cause. The syncer's own writes/renames/deletes go through + * `FileOperations`, which calls into the host filesystem; the host then + * fires watcher events that come back through `SyncClient.syncLocallyXxx`. + * Without filtering, those echo events would be re-uploaded to the server + * and broadcast back, producing an unbounded loop. + * + * The fix: every fs call in `FileOperations` registers the event it is + * about to provoke; the matching `syncLocallyXxx` handler consumes it. + * User-initiated edits never register, so they pass through unchanged. + * + * Counts are per (kind, path) so back-to-back syncer ops on the same path + * (e.g. apply remote update then re-apply during convergence) match + * one-for-one. If the watcher never fires for a registered op (e.g. the + * fs throws before notifying), the entry is left behind; `clear()` is + * called on pause/destroy to drop those before they collide with a real + * user event later. + */ +export class ExpectedFsEvents { + private readonly creates = new Map(); + private readonly updates = new Map(); + private readonly deletes = new Map(); + // Renames are keyed by `JSON.stringify({oldPath, newPath})` so the + // delimiter cannot occur inside either path. + private readonly renames = new Map(); + + private static renameKey( + oldPath: RelativePath, + newPath: RelativePath + ): string { + return JSON.stringify({ oldPath, newPath }); + } + + public expectCreate(path: RelativePath): void { + this.bump(this.creates, path); + } + + public expectUpdate(path: RelativePath): void { + this.bump(this.updates, path); + } + + public expectDelete(path: RelativePath): void { + this.bump(this.deletes, path); + } + + public expectRename(oldPath: RelativePath, newPath: RelativePath): void { + this.bump(this.renames, ExpectedFsEvents.renameKey(oldPath, newPath)); + } + + /** + * Cancel a previously-registered expectation when the fs op that registered + * it failed before any watcher event could fire. Without this, a leaked + * expectation silently swallows the next genuine user event at the same + * path (or, for renames, the same `oldPath → newPath` pair). + * + * Floored at zero: if the watcher *did* fire (op partially completed) and + * already consumed the entry, the unexpect is a no-op. The fallback is + * acceptable — at worst we re-upload a real edit we'd otherwise filter. + */ + public unexpectCreate(path: RelativePath): void { + this.decrement(this.creates, path); + } + + public unexpectUpdate(path: RelativePath): void { + this.decrement(this.updates, path); + } + + public unexpectDelete(path: RelativePath): void { + this.decrement(this.deletes, path); + } + + public unexpectRename(oldPath: RelativePath, newPath: RelativePath): void { + this.decrement( + this.renames, + ExpectedFsEvents.renameKey(oldPath, newPath) + ); + } + + public matchCreate(path: RelativePath): boolean { + return this.consume(this.creates, path); + } + + public matchUpdate( + path: RelativePath, + oldPath: RelativePath | undefined + ): boolean { + if (oldPath !== undefined) { + return this.consume( + this.renames, + ExpectedFsEvents.renameKey(oldPath, path) + ); + } + return this.consume(this.updates, path); + } + + public matchDelete(path: RelativePath): boolean { + return this.consume(this.deletes, path); + } + + public clear(): void { + this.creates.clear(); + this.updates.clear(); + this.deletes.clear(); + this.renames.clear(); + } + + private bump(map: Map, key: RelativePath): void { + map.set(key, (map.get(key) ?? 0) + 1); + } + + private consume( + map: Map, + key: RelativePath + ): boolean { + const count = map.get(key) ?? 0; + if (count === 0) { + return false; + } + if (count === 1) { + map.delete(key); + } else { + map.set(key, count - 1); + } + return true; + } + + private decrement(map: Map, key: RelativePath): void { + const count = map.get(key) ?? 0; + if (count <= 1) { + map.delete(key); + } else { + map.set(key, count - 1); + } + } +} diff --git a/frontend/sync-client/src/sync-operations/file-change-notifier.ts b/frontend/sync-client/src/sync-operations/file-change-notifier.ts index d1e49d62..414c9e91 100644 --- a/frontend/sync-client/src/sync-operations/file-change-notifier.ts +++ b/frontend/sync-client/src/sync-operations/file-change-notifier.ts @@ -1,4 +1,4 @@ -import type { RelativePath } from "../persistence/database"; +import type { RelativePath } from "./types"; import { EventListeners } from "../utils/data-structures/event-listeners"; export class FileChangeNotifier { diff --git a/frontend/sync-client/src/sync-operations/offline-change-detector.test.ts b/frontend/sync-client/src/sync-operations/offline-change-detector.test.ts new file mode 100644 index 00000000..cc710e6a --- /dev/null +++ b/frontend/sync-client/src/sync-operations/offline-change-detector.test.ts @@ -0,0 +1,185 @@ +import { describe, it } from "node:test"; +import assert from "node:assert"; +import { Logger } from "../tracing/logger"; +import { Settings } from "../persistence/settings"; +import { STORED_STATE_SCHEMA_VERSION, SyncEventQueue } from "./sync-event-queue"; +import { scheduleOfflineChanges } from "./offline-change-detector"; +import type { FileOperations } from "../file-operations/file-operations"; +import type { RelativePath } from "./types"; + +const makeQueue = async (): Promise => { + const logger = new Logger(); + const settings = new Settings(logger, {}, async () => { + /* no-op */ + }); + return new SyncEventQueue( + settings, + logger, + { schemaVersion: STORED_STATE_SCHEMA_VERSION }, + async () => { + /* no-op */ + } + ); +}; + +const makeOperations = ( + files: Record +): FileOperations => { + return { + listFilesRecursively: async () => Object.keys(files), + read: async (path: RelativePath) => { + const data = files[path]; + if (data === undefined) { + throw new Error(`File not found: ${path}`); + } + return data; + } + } as unknown as FileOperations; +}; + +describe("scheduleOfflineChanges", () => { + it("does not bind a local file to a placement-pending record whose remoteRelativePath was persisted before the doc moved on the server", async () => { + // The bug: persisted byDocId can carry a placement-pending record + // whose `remoteRelativePath` was saved before the doc was moved + // server-side. After restart, offline-scan running before WS + // catch-up would bind an unrelated local file at that stale path + // to the moved doc and push the user's content as an update — + // silently corrupting the moved doc and stranding the local file. + const queue = await makeQueue(); + + // Stale placement-pending record: server has moved this doc + // away from "stale-X.md" since this snapshot was saved. + await queue.upsertRecord({ + documentId: "MOVED-DOC", + parentVersionId: 5, + remoteRelativePath: "stale-X.md" as RelativePath, + remoteHash: "hash-from-old-state", + localPath: undefined + }); + + // User has an unrelated local file at the stale path. + const operations = makeOperations({ + "stale-X.md": new TextEncoder().encode( + "user's unrelated local content" + ) + }); + + const enqueued: { kind: string; path: string }[] = []; + await scheduleOfflineChanges( + new Logger(), + operations, + queue, + (path) => enqueued.push({ kind: "create", path }), + (args) => enqueued.push({ kind: "update", path: args.relativePath }), + (path) => enqueued.push({ kind: "delete", path }) + ); + + // The local file must become a fresh CREATE — never a hostile + // UPDATE on the moved doc. + assert.deepStrictEqual(enqueued, [ + { kind: "create", path: "stale-X.md" } + ]); + + // The placement-pending record must remain placement-pending — + // its localPath must not have been bound to the unrelated user + // file. The reconciler will place it correctly once WS catch-up + // updates `remoteRelativePath` to the doc's current location. + const record = queue.getDocumentByDocumentId("MOVED-DOC"); + assert.notStrictEqual(record, undefined); + assert.strictEqual(record?.localPath, undefined); + }); + + it("schedules an update for a local file that matches a settled record's localPath", async () => { + const queue = await makeQueue(); + await queue.upsertRecord({ + documentId: "SETTLED-DOC", + parentVersionId: 2, + remoteRelativePath: "doc.md" as RelativePath, + remoteHash: "hash", + localPath: "doc.md" as RelativePath + }); + + const operations = makeOperations({ + "doc.md": new TextEncoder().encode("content") + }); + + const enqueued: { kind: string; path: string }[] = []; + await scheduleOfflineChanges( + new Logger(), + operations, + queue, + (path) => enqueued.push({ kind: "create", path }), + (args) => enqueued.push({ kind: "update", path: args.relativePath }), + (path) => enqueued.push({ kind: "delete", path }) + ); + + assert.deepStrictEqual(enqueued, [ + { kind: "update", path: "doc.md" } + ]); + }); + + it("schedules a delete for a settled record whose local file is missing", async () => { + const queue = await makeQueue(); + await queue.upsertRecord({ + documentId: "VANISHED-DOC", + parentVersionId: 4, + remoteRelativePath: "gone.md" as RelativePath, + remoteHash: "hash", + localPath: "gone.md" as RelativePath + }); + + const operations = makeOperations({}); + + const enqueued: { kind: string; path: string }[] = []; + await scheduleOfflineChanges( + new Logger(), + operations, + queue, + (path) => enqueued.push({ kind: "create", path }), + (args) => enqueued.push({ kind: "update", path: args.relativePath }), + (path) => enqueued.push({ kind: "delete", path }) + ); + + assert.deepStrictEqual(enqueued, [ + { kind: "delete", path: "gone.md" } + ]); + }); + + it("detects an offline rename when an untracked file matches a deleted record's content hash", async () => { + const queue = await makeQueue(); + const content = new TextEncoder().encode("body"); + const contentHash = await (await import("../utils/hash")).hash(content); + + await queue.upsertRecord({ + documentId: "DOC-1", + parentVersionId: 5, + remoteRelativePath: "old.md" as RelativePath, + remoteHash: contentHash, + localPath: "old.md" as RelativePath + }); + const operations = makeOperations({ "new.md": content }); + + const enqueued: { + kind: string; + path: string; + oldPath?: string; + }[] = []; + await scheduleOfflineChanges( + new Logger(), + operations, + queue, + (path) => enqueued.push({ kind: "create", path }), + (args) => + enqueued.push({ + kind: "update", + path: args.relativePath, + oldPath: args.oldPath + }), + (path) => enqueued.push({ kind: "delete", path }) + ); + + assert.deepStrictEqual(enqueued, [ + { kind: "update", path: "new.md", oldPath: "old.md" } + ]); + }); +}); diff --git a/frontend/sync-client/src/sync-operations/offline-change-detector.ts b/frontend/sync-client/src/sync-operations/offline-change-detector.ts new file mode 100644 index 00000000..5b91e782 --- /dev/null +++ b/frontend/sync-client/src/sync-operations/offline-change-detector.ts @@ -0,0 +1,188 @@ +import type { DocumentRecord, RelativePath } from "./types"; +import type { Logger } from "../tracing/logger"; +import { hash } from "../utils/hash"; +import type { FileOperations } from "../file-operations/file-operations"; +import { findMatchingFile } from "../utils/find-matching-file"; +import type { SyncEventQueue } from "./sync-event-queue"; +import { removeFromArray } from "../utils/remove-from-array"; +import { FileNotFoundError } from "../errors/file-not-found-error"; + +/** + * Scans the local filesystem and the document database to determine + * which files were created, updated, moved, or deleted while the + * client was offline, then enqueues the appropriate sync events. + * + * Placement-pending records (`localPath === undefined`) are deliberately + * NOT bound to local files at the same `remoteRelativePath` here. The + * persisted byDocId snapshot can be stale — a doc's server-side path + * may have changed since the last save, so binding by stored path would + * fold an unrelated user file into a moved doc and silently corrupt it. + * Local files at those paths fall through to the LocalCreate flow below; + * the server's create_document handler dedupes by path+freshness when + * the doc really is at that path, and otherwise creates a new doc that + * the reconciler places correctly once catch-up updates the stale + * record's `remoteRelativePath`. + */ +export async function scheduleOfflineChanges( + logger: Logger, + operations: FileOperations, + queue: SyncEventQueue, + enqueueCreate: (path: RelativePath) => void, + enqueueUpdate: (args: { + oldPath?: RelativePath; + relativePath: RelativePath; + }) => void, + enqueueDelete: (path: RelativePath) => void +): Promise { + const allLocalFiles = new Set(await operations.listFilesRecursively()); + logger.info(`Scheduling sync for ${allLocalFiles.size} local files`); + // `allSettledDocuments()` skips records with `localPath === undefined` + // — those have no local file by definition and don't participate in + // the disk-vs-record diff. The reconciler will place them on its + // next pass. + const allDocuments = queue.allSettledDocuments(); + + // A doc is "possibly deleted" only if it has no local file. Including + // docs that still exist locally would queue a spurious delete alongside + // the update below. + const locallyPossiblyDeletedFiles: DocumentRecord[] = []; + for (const record of allDocuments.values()) { + // `localPath` is guaranteed non-undefined for entries in + // `allSettledDocuments()`, but narrow explicitly for the type + // checker (and so a future change to that helper doesn't + // silently break this loop). + if ( + record.localPath !== undefined && + !allLocalFiles.has(record.localPath) + ) { + locallyPossiblyDeletedFiles.push(record); + } + } + + const locallyPossibleCreatedFiles: RelativePath[] = []; + const syncedLocalFiles: RelativePath[] = []; + + for (const localFile of allLocalFiles) { + if (allDocuments.has(localFile)) { + syncedLocalFiles.push(localFile); + } else if (queue.hasPendingCreateForPath(localFile)) { + // A LocalCreate for this path is still in flight (no + // record yet — its docId is a Promise). Re-enqueueing + // would fire a second HTTP create that the server then + // deconflicts to a sibling path, leaving the same bytes + // in two docs. Skip; the in-flight create owns this slot. + continue; + } else { + locallyPossibleCreatedFiles.push(localFile); + } + } + + const renamedPaths = new Set(); + // Track paths that were in `allLocalFiles` at scan-start but have + // since disappeared. The scan awaits between `listFilesRecursively` + // and each `read`, so a concurrent delete (slow file events, real + // user activity) can vacate a slot mid-scan. Throwing would abort + // the whole scan; nothing to sync for a file that's already gone. + const disappearedPaths = new Set(); + for (const path of locallyPossibleCreatedFiles) { + let content: Uint8Array; + try { + content = await operations.read(path); + } catch (e) { + if (e instanceof FileNotFoundError) { + logger.debug( + `File ${path} disappeared before offline-scan could read it; skipping` + ); + disappearedPaths.add(path); + continue; + } + throw e; + } + const contentHash = await hash(content); + + const matchingDeletedFile = await findMatchingFile( + contentHash, + locallyPossiblyDeletedFiles + ); + if (matchingDeletedFile !== undefined) { + // localPath is guaranteed defined for records in + // locallyPossiblyDeletedFiles (we filtered above). + const oldPath = matchingDeletedFile.localPath; + if (oldPath === undefined) { + continue; + } + logger.debug( + `File ${path} might have been moved from ${oldPath} while offline, scheduling sync to move it` + ); + enqueueUpdate({ + oldPath, + relativePath: path + }); + removeFromArray(locallyPossiblyDeletedFiles, matchingDeletedFile); + renamedPaths.add(path); + } + } + + for (const path of locallyPossibleCreatedFiles) { + if (renamedPaths.has(path) || disappearedPaths.has(path)) { + continue; + } + + logger.info( + `File ${path} was created while offline, scheduling sync to create it` + ); + + enqueueCreate(path); + } + + for (const item of locallyPossiblyDeletedFiles) { + if (item.localPath === undefined) { + continue; + } + logger.info( + `File ${item.localPath} was deleted while offline, scheduling sync to delete it` + ); + enqueueDelete(item.localPath); + } + + for (const path of syncedLocalFiles) { + const record = allDocuments.get(path); + if ( + record !== undefined && + record.localPath !== undefined && + record.localPath !== record.remoteRelativePath && + !allLocalFiles.has(record.remoteRelativePath) && + queue.byLocalPath.get(record.remoteRelativePath) === undefined + ) { + // Lost local-rename recovery. The record's `localPath` + // (where the user has the file now) and + // `remoteRelativePath` (where the server still thinks it + // lives) disagree, which means a queued user-rename's + // LocalUpdate never reached the server before the queue + // was wiped (typically a sync reset). Without this + // branch the next `enqueueUpdate({ relativePath: path })` + // is a content-only update — server keeps the doc at the + // old path, the user's file at the new path orphans, and + // other clients never see the rename. Replay the rename + // by restoring the OLD localPath so the queue's enqueue + // can find the record by `oldPath`, then enqueueUpdate + // moves it back to the new path with `isUserRename`. + // Only fires when the old slot is genuinely empty + // (neither on disk nor claimed by another tracked + // record) — otherwise the rename target is occupied and + // we'd be confusing the byLocalPath index. + const oldPath = record.remoteRelativePath; + const newPath = record.localPath; + logger.info( + `Lost local rename detected: doc ${record.documentId} at ${oldPath} (server) vs ${newPath} (local); replaying rename to server` + ); + await queue.setLocalPath(record.documentId, oldPath); + enqueueUpdate({ oldPath, relativePath: newPath }); + continue; + } + logger.info( + `File ${path} may have been updated while offline, scheduling sync to update it` + ); + enqueueUpdate({ relativePath: path }); + } +} diff --git a/frontend/sync-client/src/sync-operations/reconciler.test.ts b/frontend/sync-client/src/sync-operations/reconciler.test.ts new file mode 100644 index 00000000..13a08363 --- /dev/null +++ b/frontend/sync-client/src/sync-operations/reconciler.test.ts @@ -0,0 +1,69 @@ +import { describe, it } from "node:test"; +import assert from "node:assert"; +import { Logger, LogLevel } from "../tracing/logger"; +import { Settings } from "../persistence/settings"; +import { STORED_STATE_SCHEMA_VERSION, SyncEventQueue } from "./sync-event-queue"; +import { Reconciler } from "./reconciler"; +import { SyncResetError } from "../errors/sync-reset-error"; +import type { FileOperations } from "../file-operations/file-operations"; +import type { SyncService } from "../services/sync-service"; +import type { RelativePath } from "./types"; + +describe("Reconciler", () => { + it("does not emit an error when placement fetch is interrupted by reset", async () => { + const logger = new Logger(); + const settings = new Settings(logger, {}, async () => { + /* no-op */ + }); + const queue = new SyncEventQueue( + settings, + logger, + { schemaVersion: STORED_STATE_SCHEMA_VERSION }, + async () => { + /* no-op */ + } + ); + + await queue.upsertRecord({ + documentId: "DOC-1", + parentVersionId: 1, + remoteHash: "hash", + remoteRelativePath: "remote.md" as RelativePath, + localPath: undefined + }); + + const operations = { + exists: async () => false, + create: async () => { + assert.fail("reset-interrupted placement should not write"); + } + } as unknown as FileOperations; + + const syncService = { + getDocumentVersionContent: async () => { + throw new SyncResetError(); + } + } as unknown as SyncService; + + const reconciler = new Reconciler( + logger, + operations, + syncService, + queue, + new Map() + ); + + await reconciler.run(); + + assert.deepStrictEqual(logger.getMessages(LogLevel.ERROR), []); + assert.ok( + logger + .getMessages(LogLevel.INFO) + .some((line) => + line.message.includes( + "content fetch for DOC-1 interrupted by sync reset" + ) + ) + ); + }); +}); diff --git a/frontend/sync-client/src/sync-operations/reconciler.ts b/frontend/sync-client/src/sync-operations/reconciler.ts new file mode 100644 index 00000000..93505a3c --- /dev/null +++ b/frontend/sync-client/src/sync-operations/reconciler.ts @@ -0,0 +1,1020 @@ +import type { FileOperations } from "../file-operations/file-operations"; +import { FileNotFoundError } from "../errors/file-not-found-error"; +import { FileAlreadyExistsError } from "../errors/file-already-exists-error"; +import type { Logger } from "../tracing/logger"; +import type { SyncService } from "../services/sync-service"; +import type { SyncEventQueue } from "./sync-event-queue"; +import type { DocumentId, DocumentRecord, RelativePath } from "./types"; +import { hash } from "../utils/hash"; +import { SyncResetError } from "../errors/sync-reset-error"; + +const SWAP_MARKER_DIR = ".vaultlink"; +const SWAP_MARKER_PREFIX = "swap-"; +const SWAP_MARKER_SUFFIX = ".json"; + +interface SwapLeg { + documentId: DocumentId; + from: RelativePath; + to: RelativePath; + expectedHashOnFrom: string; +} + +interface SwapMarker { + uuid: string; + legs: SwapLeg[]; +} + +interface PlannedMove { + record: DocumentRecord; + from: RelativePath; + to: RelativePath; +} + +function tryParseSwapMarker(bytes: Uint8Array): SwapMarker | undefined { + try { + // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion + return JSON.parse(new TextDecoder().decode(bytes)) as SwapMarker; + } catch { + return undefined; + } +} + +/** + * The Reconciler is the second of the sync engine's two loops. The wire + * loop (records ↔ server) updates `record.remoteRelativePath` and writes + * file content into `record.localPath`; it does not move files for path + * placement. The Reconciler (records ↔ disk) runs after every wire-loop + * step and best-effort lines disk up with `remoteRelativePath` for every + * tracked record. + * + * "Best effort" means: any per-record obstacle (slot occupied, file + * missing, etc.) is silently skipped and retried on the next pass. + * `run()` never throws — per-record errors are logged and the next + * record is processed. + * + * Three shapes of work exist: + * 1. Initial placement — `localPath === undefined`. The wire loop + * created the record with no on-disk presence (e.g. a remote create + * whose target slot was occupied at receive time). If the slot is + * free now, fetch content (from `pendingPlacementContent` if a + * handler stuffed it for us, otherwise from the server) and write. + * 2. Simple rename — `localPath !== remoteRelativePath` and no other + * tracked record wants our current slot. Plain rename. + * 3. Cycle — two or more records want each others' current slots + * (A → B, B → A; or longer rotations). Resolved by reading every + * member's bytes into memory then overwriting each target slot. + * A write-ahead marker file lets `recoverFromInterruptedSwap()` + * finish a swap that crashed mid-flight on next startup. + */ +export class Reconciler { + public constructor( + private readonly logger: Logger, + private readonly operations: FileOperations, + private readonly syncService: SyncService, + private readonly queue: SyncEventQueue, + // Bytes already in hand from a recent server response, keyed by + // docId. Wire-loop handlers populate this transiently when they + // have content for a record they just upserted with `localPath + // === undefined`; the reconciler uses it on the same pass + // instead of re-fetching from the server. Keys are deleted when + // consumed. + private readonly pendingPlacementContent: Map + ) {} + + /** + * Single best-effort pass. Walks every tracked record, places + * unplaced ones, and reorganises any whose `localPath !== + * remoteRelativePath`. Never throws — per-record failures are + * logged and the next record is processed. The Syncer is expected + * to call this after every wire-loop drain step, so any record + * skipped this pass gets another shot once the obstructing event + * is processed. + */ + public async run(): Promise { + const allRecords = this.collectAllRecords(); + + const movesNeeded: PlannedMove[] = []; + const deferredPlacements: DocumentRecord[] = []; + + for (const record of allRecords) { + if (record.localPath === record.remoteRelativePath) { + continue; + } + + // The reconciler operates on settled records. A record with a + // pending LocalUpdate or LocalDelete is mid-flight: the wire + // loop owns the user's intent (rename target, edit content, + // deletion) and the record's `remoteRelativePath` may still + // reflect the pre-rename server state. Touching disk now + // would race the wire loop — e.g. a queued user-rename + // LocalUpdate would find its source path vacated by the + // reconciler moving the file back to the stale + // `remoteRelativePath`. Skip; once the wire loop drains the + // pending events, a subsequent reconciler pass sees a + // settled record and converges. + if ( + this.queue.hasPendingLocalEventsForDocumentId(record.documentId) + ) { + continue; + } + + // The doc has been deleted server-side (HTTP DELETE acked) but + // the WebSocket receipt that would `removeDocumentById` hasn't + // arrived yet. The record looks like "needs initial placement" + // (`localPath === undefined`, since the LocalDelete enqueue + // cleared it), but placing would resurrect a doc the user + // explicitly deleted. Skip; `processRemoteDelete` will remove + // the record entirely once the WS receipt arrives. + if (this.queue.hasPendingServerDelete(record.documentId)) { + continue; + } + + if (record.localPath === undefined) { + deferredPlacements.push(record); + continue; + } + + // localPath !== undefined and !== remoteRelativePath. Plan a + // move. First defensive existence check: the file may have + // been deleted between the wire loop touching disk and this + // reconciler pass — the watcher's LocalDelete will land + // shortly and fix the record. Skip silently. + try { + if (!(await this.operations.exists(record.localPath))) { + this.logger.debug( + `Reconciler: record ${record.documentId} localPath ${record.localPath} ` + + `is missing on disk; skipping (LocalDelete will catch up)` + ); + continue; + } + } catch (e) { + this.logger.error( + `Reconciler: existence check failed for ${record.localPath}: ${String(e)}` + ); + continue; + } + + movesNeeded.push({ + record, + from: record.localPath, + to: record.remoteRelativePath + }); + } + + if (movesNeeded.length > 0) { + await this.executeMoves(movesNeeded); + } + + // Run placements *after* moves so a placement whose target slot + // was occupied by a tracked record at the start of the pass can + // still succeed once that record's move frees the slot. Without + // this ordering, a placement-pending record stalls until the + // next reconciler tick — which only fires when new events + // arrive, leaving the doc absent on disk if the queue happens + // to be quiescent at that moment. + for (const record of deferredPlacements) { + // Re-check the gating conditions: a pending event may have + // been enqueued for this doc while we were processing + // moves above, and an interleaved placement would race + // it. + if ( + this.queue.hasPendingLocalEventsForDocumentId(record.documentId) + ) { + continue; + } + if (this.queue.hasPendingServerDelete(record.documentId)) { + continue; + } + if (record.localPath !== undefined) { + continue; + } + await this.tryInitialPlacement(record); + } + } + + /** + * Read any swap-marker file left behind by a crash mid-swap and + * roll forward. Called once on startup before the Reconciler + * begins normal passes. Idempotent: with no marker, a no-op. + */ + public async recoverFromInterruptedSwap(): Promise { + let markerPaths: RelativePath[] = []; + try { + markerPaths = await this.findSwapMarkerFiles(); + } catch (e) { + this.logger.error( + `Reconciler: failed to scan for swap markers: ${String(e)}` + ); + return; + } + + for (const markerPath of markerPaths) { + try { + await this.recoverFromOneMarker(markerPath); + } catch (e) { + this.logger.error( + `Reconciler: recovery from ${markerPath} failed: ${String(e)}` + ); + } + } + } + + private collectAllRecords(): DocumentRecord[] { + // Iterate every tracked record — placement-pending ones + // (`localPath === undefined`) included. `allSettledDocuments` + // filters those out, which would render records born from a + // remote create that landed on an occupied slot (no on-disk + // file, no entry in `pendingPlacementContent` either, since the + // wire loop deliberately doesn't buffer their content) invisible + // forever. `pendingPlacementContent` is purely a cache for + // `tryInitialPlacement`'s content fetch — not a record-discovery + // channel. + const out: DocumentRecord[] = []; + for (const record of this.queue.allRecords()) { + out.push(record); + } + + // Best-effort cleanup: drop cached content for docs the queue + // no longer tracks. Previously this happened as a side effect of + // the placement-pending discovery loop; do it explicitly now. + if (this.pendingPlacementContent.size > 0) { + for (const docId of this.pendingPlacementContent.keys()) { + if (this.queue.getDocumentByDocumentId(docId) === undefined) { + this.pendingPlacementContent.delete(docId); + } + } + } + + return out; + } + + private async tryInitialPlacement(record: DocumentRecord): Promise { + const target = record.remoteRelativePath; + + if (this.queue.hasPendingCreateForPath(target)) { + this.logger.debug( + `Reconciler: cannot place ${record.documentId} at ${target} ` + + `— pending local create still claims that path; will retry next pass` + ); + return; + } + + // Slot occupancy: pre-check both the disk and our tracked + // records. Either form of occupancy means we wait — the + // occupant's own reconciliation pass (after their next wire-loop + // step) will move them off this slot. + try { + if (await this.operations.exists(target)) { + this.logger.debug( + `Reconciler: cannot place ${record.documentId} at ${target} ` + + `— slot occupied on disk; will retry next pass` + ); + return; + } + } catch (e) { + this.logger.error( + `Reconciler: existence check failed for ${target}: ${String(e)}` + ); + return; + } + if (this.queue.byLocalPath.get(target) !== undefined) { + this.logger.debug( + `Reconciler: cannot place ${record.documentId} at ${target} ` + + `— slot tracked by another record; will retry next pass` + ); + return; + } + + let content = this.pendingPlacementContent.get(record.documentId); + if (content === undefined) { + try { + content = await this.syncService.getDocumentVersionContent({ + documentId: record.documentId, + vaultUpdateId: record.parentVersionId + }); + } catch (e) { + if (e instanceof SyncResetError) { + this.logger.info( + `Reconciler: content fetch for ${record.documentId} interrupted by sync reset` + ); + return; + } + this.logger.error( + `Reconciler: failed to fetch content for ${record.documentId}: ${String(e)}` + ); + return; + } + } + + try { + await this.operations.create(target, content); + } catch (e) { + if (e instanceof FileNotFoundError) { + this.logger.debug( + `Reconciler: create at ${target} hit FileNotFound (likely parent ` + + `directory race); will retry next pass` + ); + return; + } + if (e instanceof FileAlreadyExistsError) { + this.logger.debug( + `Reconciler: create at ${target} lost TOCTOU race ` + + `(slot occupied between pre-check and write); will retry next pass` + ); + return; + } + this.logger.error( + `Reconciler: create at ${target} failed: ${String(e)}` + ); + return; + } + + try { + await this.queue.setLocalPath(record.documentId, target); + } catch (e) { + this.logger.error( + `Reconciler: setLocalPath after create failed for ${record.documentId}: ${String(e)}` + ); + return; + } + this.pendingPlacementContent.delete(record.documentId); + this.logger.debug( + `Reconciler: placed ${record.documentId} at ${target}` + ); + } + + private async executeMoves(moves: PlannedMove[]): Promise { + // Build a directed graph: each move (record currently at `from`, + // wants to go to `to`) gets an edge to whatever tracked record + // currently holds `to`. A node with no outgoing edge is a leaf + // in the DAG: its target slot is held by no tracked record. If + // the slot is held by an *untracked* file we can't safely + // displace it (no record to relocate); skip those moves and + // let the next pass retry. + const movesByDocId = new Map(); + for (const move of moves) { + movesByDocId.set(move.record.documentId, move); + } + + const skipped = new Set(); + const edges = new Map(); + + for (const move of moves) { + const occupant = this.queue.byLocalPath.get(move.to); + if (occupant === undefined) { + let occupied = false; + try { + occupied = await this.operations.exists(move.to); + } catch (e) { + this.logger.error( + `Reconciler: existence check failed for ${move.to}: ${String(e)}` + ); + skipped.add(move.record.documentId); + continue; + } + if (occupied) { + this.logger.debug( + `Reconciler: move ${move.record.documentId} -> ${move.to} blocked ` + + `by untracked file; will retry next pass` + ); + skipped.add(move.record.documentId); + continue; + } + edges.set(move.record.documentId, null); + } else if (occupant.documentId === move.record.documentId) { + // Self-loop on `to` shouldn't normally happen — we + // skipped records where localPath===remoteRelativePath + // up front. Defensive: nothing to do. + continue; + } else if (movesByDocId.has(occupant.documentId)) { + edges.set(move.record.documentId, occupant.documentId); + } else { + // Occupant is a tracked record that doesn't *want* to + // move (its localPath === its remoteRelativePath). We + // can't dislodge it without orphaning its on-disk + // file; skip and retry. + this.logger.debug( + `Reconciler: move ${move.record.documentId} -> ${move.to} blocked by ` + + `tracked record ${occupant.documentId} which is not moving; ` + + `will retry next pass` + ); + skipped.add(move.record.documentId); + } + } + + // SCC decomposition (Tarjan's algorithm) over the move graph. + const sccs = this.tarjanSccs(edges, skipped); + + // Topo-sort the DAG of SCCs (leaves first). Tarjan emits SCCs + // in reverse topological order — leaves first — which is + // already what we want. + for (const scc of sccs) { + if (scc.length === 1) { + const [docId] = scc; + if (skipped.has(docId)) { + continue; + } + const move = movesByDocId.get(docId); + if (move === undefined) { + continue; + } + // Self-loop check: if the only edge from this node + // points back to itself, treat as a 1-cycle (impossible + // given our up-front filter, but cheap defensiveness). + const target = edges.get(docId); + if (target === docId) { + await this.executeCycle([move]); + } else { + await this.executeSimpleRename(move); + } + } else { + const cycleMoves = scc + .map((id) => movesByDocId.get(id)) + .filter( + (m): m is PlannedMove => + m !== undefined && !skipped.has(m.record.documentId) + ); + if (cycleMoves.length === scc.length) { + await this.executeCycle(cycleMoves); + } else { + // A member of the cycle was skipped — the cycle + // can't be resolved as a unit. Skip the rest; next + // pass tries again with whatever's still relevant. + this.logger.debug( + `Reconciler: cycle of ${scc.length} skipped because a ` + + `member dropped out; will retry next pass` + ); + } + } + } + } + + private async executeSimpleRename(move: PlannedMove): Promise { + // Defense-in-depth: the queue's invariant says + // `record.localPath !== undefined ⇒ byLocalPath.get(record.localPath) === record`. + // If the byLocalPath index disagrees with the record we + // captured when planning, the invariant was violated somewhere + // upstream — the file at `move.from` belongs to a different + // record now and renaming it would clobber that record's + // content. Refuse the move; the next pass re-plans. + const indexed = this.queue.byLocalPath.get(move.from); + if (indexed !== move.record) { + this.logger.warn( + `Reconciler: refusing rename ${move.from} -> ${move.to} for ` + + `${move.record.documentId}: byLocalPath says ${move.from} ` + + `belongs to ${indexed?.documentId ?? ""} ` + + `(invariant violation upstream); skipping` + ); + return; + } + // The target may have been freed by an earlier move in this + // pass (a leaf we processed first). Re-check both source and + // target before committing. + try { + if (!(await this.operations.exists(move.from))) { + this.logger.debug( + `Reconciler: source ${move.from} vanished before rename; skipping` + ); + return; + } + } catch (e) { + this.logger.error( + `Reconciler: existence check failed for ${move.from}: ${String(e)}` + ); + return; + } + try { + if (await this.operations.exists(move.to)) { + if (this.queue.byLocalPath.get(move.to) !== undefined) { + // Slot got reclaimed by a tracked doc mid-pass — + // back off and retry next pass. + this.logger.debug( + `Reconciler: target ${move.to} reclaimed by another record ` + + `mid-pass; skipping` + ); + return; + } + // Untracked file appeared; same reasoning as in + // executeMoves' planning step. Defer. + this.logger.debug( + `Reconciler: target ${move.to} now occupied by untracked file; skipping` + ); + return; + } + } catch (e) { + this.logger.error( + `Reconciler: existence check failed for ${move.to}: ${String(e)}` + ); + return; + } + + try { + await this.operations.move(move.from, move.to); + } catch (e) { + if (e instanceof FileNotFoundError) { + this.logger.debug( + `Reconciler: rename ${move.from} -> ${move.to} hit FileNotFound; ` + + `will retry next pass` + ); + return; + } + if (e instanceof FileAlreadyExistsError) { + this.logger.debug( + `Reconciler: rename ${move.from} -> ${move.to} lost TOCTOU race ` + + `(target reclaimed between pre-check and rename); will retry next pass` + ); + return; + } + this.logger.error( + `Reconciler: rename ${move.from} -> ${move.to} failed: ${String(e)}` + ); + return; + } + + try { + await this.queue.setLocalPath(move.record.documentId, move.to); + } catch (e) { + this.logger.error( + `Reconciler: setLocalPath after rename failed for ${move.record.documentId}: ${String(e)}` + ); + return; + } + this.logger.debug( + `Reconciler: renamed ${move.record.documentId} from ${move.from} to ${move.to}` + ); + } + + private async executeCycle(members: PlannedMove[]): Promise { + // Defense-in-depth: same invariant check as + // `executeSimpleRename` but cycle-wide. If any member's `from` + // slot no longer matches the planned record per byLocalPath, + // abort the whole cycle — partial-cycle progress under a + // shadowed-record race is the worst case (it can shuffle bytes + // between the wrong docs). + for (const member of members) { + const indexed = this.queue.byLocalPath.get(member.from); + if (indexed !== member.record) { + this.logger.warn( + `Reconciler: refusing cycle: byLocalPath says ${member.from} ` + + `belongs to ${indexed?.documentId ?? ""} ` + + `but planned for ${member.record.documentId} ` + + `(invariant violation upstream); skipping cycle` + ); + return; + } + } + // Read every member's bytes first; we'll overwrite the target + // slots with these. All reads happen before any write, so the + // cycle is fully captured in memory before we start mutating + // disk. If any read fails the whole cycle is aborted — + // partial-cycle work is the riskiest case (it can leave docs + // pointing at the wrong content). + const contentByDocId = new Map(); + // We also need the pre-write content of each `to` slot for the + // 3-way merge in `operations.write` — passing the freshly-read + // disk bytes as `expectedContent` makes the merge resolve to a + // clean overwrite (since `expected === current` at write time). + const oldToContentByDocId = new Map(); + try { + for (const member of members) { + contentByDocId.set( + member.record.documentId, + await this.operations.read(member.from) + ); + } + // The `to` of each member is guaranteed to be the `from` of + // some other member (it's a cycle). We've already read all + // those `from`s, so reuse those reads. + const fromToDocId = new Map(); + for (const member of members) { + fromToDocId.set(member.from, member.record.documentId); + } + for (const member of members) { + const sourceDocId = fromToDocId.get(member.to); + if (sourceDocId === undefined) { + throw new Error( + `Reconciler: cycle ${member.record.documentId} -> ${member.to} ` + + `has no member at ${member.to}; graph is not a true cycle` + ); + } + const oldBytes = contentByDocId.get(sourceDocId); + if (oldBytes === undefined) { + throw new Error( + `Reconciler: missing pre-read content for ${sourceDocId}` + ); + } + oldToContentByDocId.set(member.record.documentId, oldBytes); + } + } catch (e) { + this.logger.error( + `Reconciler: cycle pre-read failed: ${String(e)}; aborting cycle` + ); + return; + } + + // Write-ahead marker so a crash mid-swap can be repaired on + // next start. Recovery decides what's been written by hashing + // each `from` slot — anything still matching `expectedHashOnFrom` + // hasn't been overwritten yet. + const legs: SwapLeg[] = []; + try { + for (const member of members) { + const memberContent = contentByDocId.get( + member.record.documentId + ); + if (memberContent === undefined) { + throw new Error( + `Reconciler: cycle member ${member.record.documentId} missing content` + ); + } + legs.push({ + documentId: member.record.documentId, + from: member.from, + to: member.to, + expectedHashOnFrom: await hash(memberContent) + }); + } + } catch (e) { + this.logger.error( + `Reconciler: cycle hashing failed: ${String(e)}; aborting cycle` + ); + return; + } + + const markerUuid = crypto.randomUUID(); + const markerPath = this.markerPathFor(markerUuid); + const markerBytes = new TextEncoder().encode( + JSON.stringify({ uuid: markerUuid, legs } satisfies SwapMarker) + ); + try { + // The marker path embeds a fresh uuid, so a FileAlreadyExistsError + // is statistically impossible here. + await this.operations.create(markerPath, markerBytes); + } catch (e) { + this.logger.error( + `Reconciler: failed to write swap marker ${markerPath}: ${String(e)}; ` + + `aborting cycle` + ); + return; + } + + // Now apply the writes. Each leg overwrites the bytes at `to` + // with the bytes that were at the cycle predecessor's `from`. + // We pass the freshly-read pre-write content as + // `expectedContent` so the 3-way merge inside `operations.write` + // becomes a clean overwrite (no concurrent edits to merge with). + // `operations.write` registers `expectUpdate` itself, so the + // watcher swallows each leg's modify event. + const writtenLegs: SwapLeg[] = []; + for (const leg of legs) { + const newBytes = contentByDocId.get(leg.documentId); + const oldBytes = oldToContentByDocId.get(leg.documentId); + if (newBytes === undefined || oldBytes === undefined) { + this.logger.error( + `Reconciler: cycle leg ${leg.from} -> ${leg.to} missing ` + + `content; aborting cycle` + ); + return; + } + try { + await this.operations.write(leg.to, oldBytes, newBytes); + writtenLegs.push(leg); + } catch (e) { + this.logger.error( + `Reconciler: cycle leg ${leg.from} -> ${leg.to} write failed: ` + + `${String(e)}; cycle is now in a half-applied state — recovery ` + + `marker ${markerPath} will roll forward on next start` + ); + // Don't delete the marker — it's load-bearing for + // recovery. The records' localPath assignments are + // intentionally NOT updated for the failed leg or any + // subsequent leg, so the next reconciler pass will + // observe the same situation and re-plan. + return; + } + } + + // Re-key records to their new localPaths. We do this AFTER + // all writes succeeded; if a setLocalPath fails partway the + // marker is still on disk and recovery covers it. + for (const leg of writtenLegs) { + try { + await this.queue.setLocalPath(leg.documentId, leg.to); + } catch (e) { + this.logger.error( + `Reconciler: setLocalPath after cycle write failed for ` + + `${leg.documentId}: ${String(e)}` + ); + } + } + + try { + await this.operations.delete(markerPath); + } catch (e) { + this.logger.warn( + `Reconciler: failed to delete swap marker ${markerPath}: ${String(e)}; ` + + `next start's recovery will see it but find every leg already applied` + ); + } + this.logger.debug( + `Reconciler: completed cycle of ${members.length} members` + ); + } + + private async findSwapMarkerFiles(): Promise { + let entries: RelativePath[] = []; + try { + entries = + await this.operations.listFilesRecursively(SWAP_MARKER_DIR); + } catch (e) { + if (e instanceof FileNotFoundError) { + return []; + } + throw e; + } + return entries.filter((p) => { + const name = p.split("/").pop() ?? ""; + return ( + name.startsWith(SWAP_MARKER_PREFIX) && + name.endsWith(SWAP_MARKER_SUFFIX) + ); + }); + } + + private async recoverFromOneMarker( + markerPath: RelativePath + ): Promise { + const markerBytes = await this.operations.read(markerPath); + const marker = this.parseSwapMarker(markerBytes); + if (marker === undefined) { + this.logger.error( + `Reconciler: corrupt swap marker ${markerPath}; deleting` + ); + try { + await this.operations.delete(markerPath); + } catch (deleteErr) { + this.logger.error( + `Reconciler: failed to delete corrupt marker ${markerPath}: ${String(deleteErr)}` + ); + } + return; + } + + this.logger.info( + `Reconciler: recovering from interrupted swap ${marker.uuid} ` + + `with ${marker.legs.length} legs` + ); + + // Recovery rules per leg: + // - hash(from) === expectedHashOnFrom — the swap was + // interrupted BEFORE this leg overwrote `to`. We need to + // write the source bytes to `to` AND update the record. + // - hash(from) differs (or `from` is missing) — this leg + // already ran (someone else's bytes are now at `from`, + // which means the cycle predecessor's leg ran too). Mark + // as already-applied for record bookkeeping. + for (const leg of marker.legs) { + let needsApply = false; + try { + if (await this.operations.exists(leg.from)) { + const fromBytes = await this.operations.read(leg.from); + const fromHash = await hash(fromBytes); + needsApply = fromHash === leg.expectedHashOnFrom; + } + } catch (e) { + this.logger.error( + `Reconciler: hash check during recovery for ${leg.from} failed: ` + + `${String(e)}; skipping leg` + ); + continue; + } + + if (needsApply) { + try { + const sourceBytes = await this.operations.read(leg.from); + // We don't know what (if anything) is at `to`. If + // it exists we want to overwrite. operations.write + // refuses if the file doesn't exist, so: + if (await this.operations.exists(leg.to)) { + const currentToBytes = await this.operations.read( + leg.to + ); + await this.operations.write( + leg.to, + currentToBytes, + sourceBytes + ); + } else { + await this.operations.create(leg.to, sourceBytes); + } + } catch (e) { + this.logger.error( + `Reconciler: applying recovery leg ${leg.from} -> ${leg.to} ` + + `failed: ${String(e)}` + ); + continue; + } + } + + // Whether we just applied or it was already applied, + // update the record so its localPath matches the + // post-swap state. + try { + const record = this.queue.getDocumentByDocumentId( + leg.documentId + ); + if (record !== undefined) { + await this.queue.setLocalPath(leg.documentId, leg.to); + } + } catch (e) { + this.logger.error( + `Reconciler: setLocalPath during recovery for ${leg.documentId} ` + + `failed: ${String(e)}` + ); + } + } + + try { + await this.operations.delete(markerPath); + } catch (e) { + this.logger.error( + `Reconciler: failed to delete swap marker ${markerPath} after recovery: ` + + String(e) + ); + } + } + + private markerPathFor(uuid: string): RelativePath { + return `${SWAP_MARKER_DIR}/${SWAP_MARKER_PREFIX}${uuid}${SWAP_MARKER_SUFFIX}`; + } + + /** + * SCC decomposition over the move graph, returning components in + * leaves-first order (so the caller can process leaves before + * cycles, freeing target slots progressively). + * + * Exploits the fact that this is a *functional graph*: each node + * has at most one outgoing edge (the doc whose slot we want). So + * every non-trivial SCC is a single simple cycle; any non-cycle + * node is its own singleton component. To detect cycles, walk + * from each unvisited node following edges and mark the path; if + * we hit a node on the current path, the segment from that node + * to the current frontier is a cycle. If we hit a visited node + * not on the current path (or a null), we just chain leaves. + * + * Skipped nodes are treated as having no outgoing edge (their + * targets are blocked). + */ + private tarjanSccs( + edges: Map, + skipped: Set + ): DocumentId[][] { + const allNodes = new Set(); + for (const id of edges.keys()) { + allNodes.add(id); + } + for (const id of skipped) { + allNodes.add(id); + } + + const visited = new Set(); + const componentOf = new Map(); + const sccs: DocumentId[][] = []; + + const edgeOf = (node: DocumentId): DocumentId | null => { + if (skipped.has(node)) { + return null; + } + return edges.get(node) ?? null; + }; + + for (const root of allNodes) { + if (visited.has(root)) { + continue; + } + + // Walk forward marking the path until we hit a visited node + // or a null. `pathIndex` lets us detect "did we land back on + // our own path". + const path: DocumentId[] = []; + const pathIndex = new Map(); + let cursor: DocumentId | null = root; + + while ( + cursor !== null && + !visited.has(cursor) && + !pathIndex.has(cursor) + ) { + pathIndex.set(cursor, path.length); + path.push(cursor); + cursor = edgeOf(cursor); + } + + // We stopped because either (a) cursor is null, (b) cursor + // is already visited (chain merges into an earlier-explored + // subgraph — every node on `path` is its own singleton + // component), or (c) cursor is on `path` itself — the + // suffix of `path` from `pathIndex.get(cursor)` onward is a + // cycle; the prefix is a tail of singletons. + let cycleStart = path.length; + if (cursor !== null) { + const idx = pathIndex.get(cursor); + if (idx !== undefined) { + cycleStart = idx; + } + } + + // Singletons in `path[0..cycleStart)`. Emit them in + // leaves-first order: the deepest (closest to the cycle or + // chain-end) is the leaf in the DAG of SCCs, so we emit + // from the END of the prefix backward to get topo order + // (children before parents). + for (let i = cycleStart - 1; i >= 0; i--) { + const node = path[i]; + visited.add(node); + const componentId = sccs.length; + componentOf.set(node, componentId); + sccs.push([node]); + } + // Cycle (if any). + if (cycleStart < path.length) { + const cycleNodes = path.slice(cycleStart); + const componentId = sccs.length; + for (const node of cycleNodes) { + visited.add(node); + componentOf.set(node, componentId); + } + sccs.push(cycleNodes); + } + } + + // The order produced above is mostly leaves-first per chain, + // but chains explored later may include singletons that merge + // into earlier-emitted components. Re-sort by (component points + // to anything? if so, target's component must come first). With + // a functional graph this is equivalent to emitting any node + // before the node it points to. Do a final stable topo sort. + const componentTarget = new Map(); + for (let cid = 0; cid < sccs.length; cid++) { + // Pick a representative; in a functional-graph SCC, every + // node's edge points either inside the SCC (cycle) or to + // exactly one other SCC (singleton chain). For singletons + // the representative's edge gives us the parent component. + const [rep] = sccs[cid]; + const edge = edgeOf(rep); + if (edge === null) { + componentTarget.set(cid, null); + } else { + const targetCid = componentOf.get(edge); + if (targetCid === undefined || targetCid === cid) { + componentTarget.set(cid, null); + } else { + componentTarget.set(cid, targetCid); + } + } + } + + // Topo-sort: emit a component only after its target has been + // emitted. + const emitted = new Set(); + const ordered: DocumentId[][] = []; + const tryEmit = (cid: number, stack: Set): void => { + if (emitted.has(cid)) { + return; + } + if (stack.has(cid)) { + return; + } // shouldn't happen given functional-graph SCC contraction + stack.add(cid); + const target = componentTarget.get(cid) ?? null; + if (target !== null) { + tryEmit(target, stack); + } + stack.delete(cid); + if (!emitted.has(cid)) { + emitted.add(cid); + ordered.push(sccs[cid]); + } + }; + for (let cid = 0; cid < sccs.length; cid++) { + tryEmit(cid, new Set()); + } + + return ordered; + } + + private parseSwapMarker(bytes: Uint8Array): SwapMarker | undefined { + // Marker files are written by us (`writeSwapMarker`) and only + // consumed here on startup recovery; the shape is closed. Treat + // a parse failure as a corrupt marker. + const parsed = tryParseSwapMarker(bytes); + if ( + parsed === undefined || + typeof parsed.uuid !== "string" || + !Array.isArray(parsed.legs) + ) { + return undefined; + } + return parsed; + } +} diff --git a/frontend/sync-client/src/sync-operations/sync-event-queue.test.ts b/frontend/sync-client/src/sync-operations/sync-event-queue.test.ts new file mode 100644 index 00000000..d2676011 --- /dev/null +++ b/frontend/sync-client/src/sync-operations/sync-event-queue.test.ts @@ -0,0 +1,907 @@ +import { describe, it } from "node:test"; +import assert from "node:assert"; +import { + STORED_STATE_SCHEMA_VERSION, + SyncEventQueue +} from "./sync-event-queue"; +import { Settings } from "../persistence/settings"; +import { Logger } from "../tracing/logger"; +import type { DocumentVersionWithoutContent } from "../services/types/DocumentVersionWithoutContent"; +import { SyncEventType } from "./types"; +import type { DocumentRecord, RelativePath, StoredSyncState } from "./types"; + +interface QueueHarness { + queue: SyncEventQueue; + settings: Settings; + saveCalls: StoredSyncState[]; +} + +function createHarness( + options: { + ignorePatterns?: string[]; + initialState?: Partial; + omitSchemaVersion?: boolean; + } = {} +): QueueHarness { + const logger = new Logger(); + const settings = new Settings( + logger, + { ignorePatterns: options.ignorePatterns ?? [] }, + async () => { + /* no-op */ + } + ); + + const saveCalls: StoredSyncState[] = []; + const initialState: Partial | undefined = + options.initialState === undefined && options.omitSchemaVersion !== true + ? { schemaVersion: STORED_STATE_SCHEMA_VERSION } + : options.initialState; + + const queue = new SyncEventQueue( + settings, + logger, + initialState, + async (data) => { + saveCalls.push(data); + } + ); + return { queue, settings, saveCalls }; +} + +function createQueue(ignorePatterns: string[] = []): SyncEventQueue { + return createHarness({ ignorePatterns }).queue; +} + +function fakeRemoteVersion( + documentId: string, + overrides: Partial = {} +): DocumentVersionWithoutContent { + return { + vaultUpdateId: 1, + documentId, + relativePath: `${documentId}.md`, + updatedDate: "2026-01-01", + isDeleted: false, + userId: "user", + deviceId: "device", + contentSize: 100, + isNewFile: true, + ...overrides + }; +} + +function fakeRecord( + documentId: string, + overrides: Partial = {} +): DocumentRecord { + const path = `${documentId.toLowerCase()}.md`; + return { + documentId, + parentVersionId: 1, + remoteHash: `hash-${documentId}`, + remoteRelativePath: path, + localPath: path, + ...overrides + }; +} + +describe("SyncEventQueue", () => { + it("returns enqueued events in FIFO order with no coalescing", async () => { + const queue = createQueue(); + await queue.upsertRecord(fakeRecord("A")); + + await queue.enqueue({ type: SyncEventType.LocalCreate, path: "b.md" }); + await queue.enqueue({ type: SyncEventType.LocalCreate, path: "c.md" }); + await queue.enqueue({ type: SyncEventType.LocalDelete, path: "a.md" }); + + const first = await queue.next(); + assert.strictEqual(first?.type, SyncEventType.LocalCreate); + + const second = await queue.next(); + assert.strictEqual(second?.type, SyncEventType.LocalCreate); + + const third = await queue.next(); + assert.strictEqual(third?.type, SyncEventType.LocalDelete); + assert.strictEqual(third.documentId, "A"); + + assert.strictEqual(await queue.next(), undefined); + }); + + it("create events are returned FIFO", async () => { + const queue = createQueue(); + await queue.enqueue({ type: SyncEventType.LocalCreate, path: "a.md" }); + await queue.enqueue({ type: SyncEventType.LocalCreate, path: "b.md" }); + + const first = await queue.next(); + assert.strictEqual(first?.type, SyncEventType.LocalCreate); + assert.strictEqual(first.path, "a.md"); + + const second = await queue.next(); + assert.strictEqual(second?.type, SyncEventType.LocalCreate); + assert.strictEqual(second.path, "b.md"); + }); + + it("delete resolves documentId from path", async () => { + const queue = createQueue(); + await queue.upsertRecord(fakeRecord("A")); + + await queue.enqueue({ type: SyncEventType.LocalDelete, path: "a.md" }); + + const event = await queue.next(); + assert.strictEqual(event?.type, SyncEventType.LocalDelete); + assert.strictEqual(event.documentId, "A"); + }); + + it("delete for unknown path is silently ignored", async () => { + const queue = createQueue(); + await queue.enqueue({ + type: SyncEventType.LocalDelete, + path: "unknown.md" + }); + assert.strictEqual(queue.pendingUpdateCount, 0); + }); + + it("delete clears the localPath of the affected record", async () => { + const queue = createQueue(); + await queue.upsertRecord(fakeRecord("A")); + + await queue.enqueue({ type: SyncEventType.LocalDelete, path: "a.md" }); + + const record = queue.getDocumentByDocumentId("A"); + assert.ok(record !== undefined); + assert.strictEqual(record.localPath, undefined); + assert.strictEqual( + queue.getRecordByLocalPath("a.md" as RelativePath), + undefined + ); + }); + + it("document store CRUD operations work correctly", async () => { + const queue = createQueue(); + + assert.strictEqual( + queue.getRecordByLocalPath("a.md" as RelativePath), + undefined + ); + assert.strictEqual(queue.syncedDocumentCount, 0); + + await queue.upsertRecord(fakeRecord("A")); + assert.strictEqual(queue.syncedDocumentCount, 1); + + const settled = queue.getRecordByLocalPath("a.md" as RelativePath); + assert.strictEqual(settled?.documentId, "A"); + assert.strictEqual(settled.localPath, "a.md"); + assert.strictEqual(settled.remoteRelativePath, "a.md"); + + const found = queue.getDocumentByDocumentId("A"); + assert.strictEqual(found?.localPath, "a.md"); + assert.strictEqual(found.documentId, "A"); + + await queue.removeDocumentById("A"); + assert.strictEqual(queue.syncedDocumentCount, 0); + assert.strictEqual( + queue.getRecordByLocalPath("a.md" as RelativePath), + undefined + ); + assert.strictEqual(queue.getDocumentByDocumentId("A"), undefined); + }); + + it("LocalUpdate with oldPath moves the document on disk", async () => { + const queue = createQueue(); + await queue.upsertRecord(fakeRecord("A")); + + await queue.enqueue({ + type: SyncEventType.LocalUpdate, + path: "b.md", + oldPath: "a.md" + }); + + assert.strictEqual( + queue.getRecordByLocalPath("a.md" as RelativePath), + undefined + ); + const moved = queue.getRecordByLocalPath("b.md" as RelativePath); + assert.strictEqual(moved?.documentId, "A"); + assert.strictEqual(moved.localPath, "b.md"); + + // The doc's remoteRelativePath is owned by the wire loop, not the + // watcher path — a local rename does not move the server-side path. + assert.strictEqual(moved.remoteRelativePath, "a.md"); + }); + + it("LocalUpdate rename onto a tracked slot enqueues a delete for the displaced doc", async () => { + const queue = createQueue(); + await queue.upsertRecord(fakeRecord("A")); + await queue.upsertRecord(fakeRecord("B")); + + // User renames a.md onto b.md, clobbering b.md on disk. + await queue.enqueue({ + type: SyncEventType.LocalUpdate, + path: "b.md", + oldPath: "a.md" + }); + + // Doc A now lives at b.md. + const aRecord = queue.getDocumentByDocumentId("A"); + assert.strictEqual(aRecord?.localPath, "b.md"); + const slot = queue.getRecordByLocalPath("b.md" as RelativePath); + assert.strictEqual(slot?.documentId, "A"); + + // Doc B has no local file anymore (its bytes were overwritten). + const bRecord = queue.getDocumentByDocumentId("B"); + assert.strictEqual(bRecord?.localPath, undefined); + + // Two events should be queued: the LocalDelete for B, then the + // LocalUpdate for A (push order in `enqueue`). + assert.strictEqual(queue.pendingUpdateCount, 2); + + const first = await queue.next(); + assert.strictEqual(first?.type, SyncEventType.LocalDelete); + assert.strictEqual(first.documentId, "B"); + assert.strictEqual(first.path, "b.md"); + + const second = await queue.next(); + assert.strictEqual(second?.type, SyncEventType.LocalUpdate); + assert.strictEqual(second.documentId, "A"); + assert.strictEqual(second.path, "b.md"); + assert.strictEqual(second.isUserRename, true); + }); + + it("settled record owns a path over a stale pending create", async () => { + const queue = createQueue(); + await queue.upsertRecord(fakeRecord("A", { localPath: "b.md" })); + + await queue.enqueue({ type: SyncEventType.LocalCreate, path: "b.md" }); + await queue.enqueue({ + type: SyncEventType.LocalUpdate, + path: "c.md", + oldPath: "b.md" + }); + + const aRecord = queue.getDocumentByDocumentId("A"); + assert.strictEqual(aRecord?.localPath, "c.md"); + assert.strictEqual( + queue.getRecordByLocalPath("b.md" as RelativePath), + undefined + ); + assert.strictEqual( + queue.getRecordByLocalPath("c.md" as RelativePath)?.documentId, + "A" + ); + + const create = await queue.next(); + assert.strictEqual(create?.type, SyncEventType.LocalCreate); + assert.strictEqual(create.path, "b.md"); + + const update = await queue.next(); + assert.strictEqual(update?.type, SyncEventType.LocalUpdate); + assert.strictEqual(update.documentId, "A"); + assert.strictEqual(update.path, "c.md"); + }); + + it("byLocalPath stays consistent across upsertRecord, setLocalPath, and rename", async () => { + const queue = createQueue(); + + await queue.upsertRecord(fakeRecord("A")); + assert.strictEqual(queue.byLocalPath.size, 1); + assert.strictEqual( + queue.byLocalPath.get("a.md" as RelativePath)?.documentId, + "A" + ); + + // upsertRecord on an existing record with a non-undefined + // localPath does NOT rewrite localPath. The watcher path and the + // reconciler are the only authorities on localPath of an + // already-placed record; letting the wire loop re-key here would + // race a user rename that landed during an HTTP roundtrip. + await queue.upsertRecord( + fakeRecord("A", { localPath: "renamed.md" as RelativePath }) + ); + assert.strictEqual(queue.byLocalPath.size, 1); + assert.strictEqual( + queue.byLocalPath.get("a.md" as RelativePath)?.documentId, + "A" + ); + assert.strictEqual( + queue.byLocalPath.get("renamed.md" as RelativePath), + undefined + ); + assert.strictEqual(queue.getDocumentByDocumentId("A")?.localPath, "a.md"); + + // setLocalPath does re-key — it's the explicit path-mutation API. + await queue.setLocalPath("A", "later.md" as RelativePath); + assert.strictEqual(queue.byLocalPath.size, 1); + assert.strictEqual( + queue.byLocalPath.get("a.md" as RelativePath), + undefined + ); + assert.strictEqual( + queue.byLocalPath.get("later.md" as RelativePath)?.documentId, + "A" + ); + + // setLocalPath to undefined should drop the entry. + await queue.setLocalPath("A", undefined); + assert.strictEqual(queue.byLocalPath.size, 0); + assert.strictEqual( + queue.byLocalPath.get("later.md" as RelativePath), + undefined + ); + + // The record is still tracked by docId. + assert.strictEqual( + queue.getDocumentByDocumentId("A")?.localPath, + undefined + ); + }); + + it("upsertRecord installs localPath only when the existing record has none (placement-pending → placed)", async () => { + const queue = createQueue(); + + // Same-docId-collapse shape: a placement-pending record (created + // earlier by a remote-create handler when the slot was occupied) + // gets resolved by a LocalCreate that returns the same docId. + // The watcher hasn't touched localPath since the record is + // placement-pending, so installing the now-known path is correct. + await queue.upsertRecord(fakeRecord("A", { localPath: undefined })); + assert.strictEqual(queue.byLocalPath.size, 0); + + await queue.upsertRecord( + fakeRecord("A", { localPath: "fresh.md" as RelativePath }) + ); + assert.strictEqual(queue.byLocalPath.size, 1); + assert.strictEqual( + queue.byLocalPath.get("fresh.md" as RelativePath)?.documentId, + "A" + ); + assert.strictEqual( + queue.getDocumentByDocumentId("A")?.localPath, + "fresh.md" + ); + }); + + it("upsertRecord ignores stale localPath from the wire loop after a watcher rename", async () => { + const queue = createQueue(); + await queue.upsertRecord(fakeRecord("A")); + + // Watcher renames a.md -> renamed.md while the wire loop is + // mid-roundtrip. The wire loop captured an earlier snapshot of + // localPath and now tries to write it back through upsertRecord. + await queue.enqueue({ + type: SyncEventType.LocalUpdate, + path: "renamed.md", + oldPath: "a.md" + }); + assert.strictEqual( + queue.getDocumentByDocumentId("A")?.localPath, + "renamed.md" + ); + + await queue.upsertRecord( + fakeRecord("A", { + parentVersionId: 2, + remoteRelativePath: "a.md", + remoteHash: "hash-A-v2", + localPath: "a.md" as RelativePath + }) + ); + + // The watcher's rename wins: localPath stays at renamed.md. + const record = queue.getDocumentByDocumentId("A"); + assert.strictEqual(record?.localPath, "renamed.md"); + assert.strictEqual(record.parentVersionId, 2); + assert.strictEqual(record.remoteRelativePath, "a.md"); + assert.strictEqual(record.remoteHash, "hash-A-v2"); + assert.strictEqual( + queue.byLocalPath.get("renamed.md" as RelativePath)?.documentId, + "A" + ); + assert.strictEqual( + queue.byLocalPath.get("a.md" as RelativePath), + undefined + ); + }); + + it("create can be re-enqueued after being dequeued", async () => { + const queue = createQueue(); + await queue.enqueue({ type: SyncEventType.LocalCreate, path: "a.md" }); + await queue.next(); + + await queue.enqueue({ type: SyncEventType.LocalCreate, path: "a.md" }); + assert.strictEqual(queue.pendingUpdateCount, 1); + }); + + it("silently ignores create events matching ignore patterns", async () => { + const queue = createQueue(["*.tmp", ".hidden/**"]); + + await queue.enqueue({ + type: SyncEventType.LocalCreate, + path: "scratch.tmp" + }); + await queue.enqueue({ + type: SyncEventType.LocalCreate, + path: ".hidden/secret.md" + }); + assert.strictEqual(queue.pendingUpdateCount, 0); + + await queue.enqueue({ + type: SyncEventType.LocalCreate, + path: "notes-new.md" + }); + assert.strictEqual(queue.pendingUpdateCount, 1); + + await queue.enqueue({ + type: SyncEventType.RemoteChange, + remoteVersion: fakeRemoteVersion("N") + }); + assert.strictEqual(queue.pendingUpdateCount, 2); + }); + + it("addInternalIgnorePattern hides paths from enqueue and survives settings reload", async () => { + const harness = createHarness({ ignorePatterns: ["*.tmp"] }); + const { queue, settings } = harness; + + queue.addInternalIgnorePattern(".vaultlink/**"); + + await queue.enqueue({ + type: SyncEventType.LocalCreate, + path: ".vaultlink/swap" + }); + assert.strictEqual(queue.pendingUpdateCount, 0); + + // User-pattern matching still works alongside the internal pattern. + await queue.enqueue({ + type: SyncEventType.LocalCreate, + path: "scratch.tmp" + }); + assert.strictEqual(queue.pendingUpdateCount, 0); + + // Settings reload must not forget the internal pattern. + await settings.setSettings({ ignorePatterns: ["*.bak"] }); + + await queue.enqueue({ + type: SyncEventType.LocalCreate, + path: ".vaultlink/another" + }); + assert.strictEqual(queue.pendingUpdateCount, 0); + + // The new user pattern took effect. + await queue.enqueue({ + type: SyncEventType.LocalCreate, + path: "old.bak" + }); + assert.strictEqual(queue.pendingUpdateCount, 0); + + // And paths outside both pattern sets still pass through. + await queue.enqueue({ + type: SyncEventType.LocalCreate, + path: "notes.md" + }); + assert.strictEqual(queue.pendingUpdateCount, 1); + }); + + it("clearPending removes events but keeps documents", async () => { + const queue = createQueue(); + await queue.upsertRecord(fakeRecord("A")); + await queue.enqueue({ type: SyncEventType.LocalCreate, path: "b.md" }); + await queue.enqueue({ type: SyncEventType.LocalCreate, path: "c.md" }); + + assert.strictEqual(queue.pendingUpdateCount, 2); + + queue.clearPending(); + + assert.strictEqual(queue.pendingUpdateCount, 0); + assert.strictEqual(queue.syncedDocumentCount, 1); + assert.strictEqual( + queue.getRecordByLocalPath("a.md" as RelativePath)?.documentId, + "A" + ); + }); + + it("allSettledDocuments returns all tracked documents that have a localPath", async () => { + const queue = createQueue(); + await queue.upsertRecord(fakeRecord("A")); + await queue.upsertRecord(fakeRecord("B")); + // A doc with no local file (e.g. a remote create whose slot was + // occupied) should not appear in the localPath-keyed view. + await queue.upsertRecord(fakeRecord("C", { localPath: undefined })); + + const docs = queue.allSettledDocuments(); + assert.strictEqual(docs.size, 2); + const paths = Array.from(docs.keys()).sort(); + assert.deepStrictEqual(paths, ["a.md", "b.md"]); + }); + + it("loads initial state from persistence", () => { + const harness = createHarness({ + initialState: { + schemaVersion: STORED_STATE_SCHEMA_VERSION, + documents: [ + fakeRecord("A", { parentVersionId: 5 }), + fakeRecord("B", { parentVersionId: 3 }) + ], + lastSeenUpdateId: 4 + } + }); + const { queue } = harness; + + assert.strictEqual(queue.syncedDocumentCount, 2); + assert.strictEqual( + queue.getRecordByLocalPath("a.md" as RelativePath)?.documentId, + "A" + ); + assert.strictEqual( + queue.getRecordByLocalPath("b.md" as RelativePath)?.documentId, + "B" + ); + assert.strictEqual(queue.lastSeenUpdateId, 4); + }); + + it("constructor with mismatched schema version wipes state and saves the new version", () => { + const harness = createHarness({ + initialState: { + schemaVersion: 0, + documents: [fakeRecord("A"), fakeRecord("B")], + lastSeenUpdateId: 7 + } + }); + + // Persisted documents and watermark were discarded. + assert.strictEqual(harness.queue.syncedDocumentCount, 0); + assert.strictEqual(harness.queue.lastSeenUpdateId, 0); + + // The constructor scheduled a save (don't await — fire-and-forget), + // but we synchronously enqueued it so it should have landed by now. + // The recorded save uses the current schema version. + assert.ok(harness.saveCalls.length >= 1); + const last = harness.saveCalls[harness.saveCalls.length - 1]; + assert.strictEqual(last.schemaVersion, STORED_STATE_SCHEMA_VERSION); + assert.deepStrictEqual(last.documents, []); + assert.strictEqual(last.lastSeenUpdateId, 0); + }); + + it("constructor with missing schema version also wipes state", () => { + const harness = createHarness({ + initialState: { + documents: [fakeRecord("A")], + lastSeenUpdateId: 3 + } + }); + + assert.strictEqual(harness.queue.syncedDocumentCount, 0); + assert.strictEqual(harness.queue.lastSeenUpdateId, 0); + assert.ok(harness.saveCalls.length >= 1); + assert.strictEqual( + harness.saveCalls[harness.saveCalls.length - 1].schemaVersion, + STORED_STATE_SCHEMA_VERSION + ); + }); + + it("resolveCreate settles the document and resolves the create promise", async () => { + const queue = createQueue(); + + await queue.enqueue({ type: SyncEventType.LocalCreate, path: "a.md" }); + + const event = await queue.next(); // dequeue the create + assert.ok(event?.type === SyncEventType.LocalCreate); + const createPromise = event.resolvers.promise; + + await queue.resolveCreate( + event, + fakeRecord("DOC-1", { + parentVersionId: 5, + localPath: "a.md" as RelativePath, + remoteRelativePath: "a.md" as RelativePath + }) + ); + + // Document is now settled + assert.strictEqual( + queue.getRecordByLocalPath("a.md" as RelativePath)?.documentId, + "DOC-1" + ); + + // Promise was resolved + assert.strictEqual(await createPromise, "DOC-1"); + }); + + it("delete collapses a pending create that has not started processing", async () => { + const queue = createQueue(); + + await queue.enqueue({ type: SyncEventType.LocalCreate, path: "a.md" }); + const create = queue.peekFront(); + assert.ok(create?.type === SyncEventType.LocalCreate); + + await queue.enqueue({ type: SyncEventType.LocalDelete, path: "a.md" }); + + assert.strictEqual(queue.pendingUpdateCount, 0); + assert.strictEqual(await queue.next(), undefined); + await assert.rejects(create.resolvers.promise, /cancelled/); + }); + + it("resolveCreate does not claim a localPath after an in-flight pending create was deleted", async () => { + const queue = createQueue(); + + await queue.enqueue({ type: SyncEventType.LocalCreate, path: "a.md" }); + const create = queue.peekFront(); + assert.ok(create?.type === SyncEventType.LocalCreate); + create.isProcessing = true; + + await queue.enqueue({ type: SyncEventType.LocalDelete, path: "a.md" }); + + await queue.resolveCreate( + create, + fakeRecord("DOC-1", { + localPath: "a.md" as RelativePath, + remoteRelativePath: "a.md" as RelativePath + }) + ); + + assert.strictEqual( + queue.getDocumentByDocumentId("DOC-1")?.localPath, + undefined + ); + assert.strictEqual( + queue.getRecordByLocalPath("a.md" as RelativePath), + undefined + ); + + const deleteEvent = await queue.next(); + assert.strictEqual(deleteEvent?.type, SyncEventType.LocalDelete); + assert.strictEqual(deleteEvent.documentId, "DOC-1"); + }); + + it("resolveCreate only clears localPath for a pending delete of that path", async () => { + const queue = createQueue(); + + await queue.enqueue({ + type: SyncEventType.LocalCreate, + path: "old.md" + }); + const create = queue.peekFront(); + assert.ok(create?.type === SyncEventType.LocalCreate); + create.isProcessing = true; + + await queue.enqueue({ + type: SyncEventType.LocalDelete, + path: "old.md" + }); + + await queue.resolveCreate( + create, + fakeRecord("DOC-1", { + localPath: "new.md" as RelativePath, + remoteRelativePath: "new.md" as RelativePath + }) + ); + + assert.strictEqual( + queue.getDocumentByDocumentId("DOC-1")?.localPath, + "new.md" + ); + assert.strictEqual( + queue.getRecordByLocalPath("new.md" as RelativePath)?.documentId, + "DOC-1" + ); + + const deleteEvent = await queue.next(); + assert.strictEqual(deleteEvent?.type, SyncEventType.LocalDelete); + assert.strictEqual(deleteEvent.documentId, "DOC-1"); + assert.strictEqual(deleteEvent.path, "old.md"); + }); + + it("pending create owns a same-path delete over a stale deleting record", async () => { + const queue = createQueue(); + await queue.upsertRecord( + fakeRecord("OLD", { localPath: "a.md" as RelativePath }) + ); + queue.markServerDeletePending("OLD"); + + await queue.enqueue({ type: SyncEventType.LocalCreate, path: "a.md" }); + const create = queue.peekFront(); + assert.ok(create?.type === SyncEventType.LocalCreate); + create.isProcessing = true; + + await queue.enqueue({ type: SyncEventType.LocalDelete, path: "a.md" }); + + assert.strictEqual( + queue.getDocumentByDocumentId("OLD")?.localPath, + undefined + ); + assert.strictEqual( + queue.getRecordByLocalPath("a.md" as RelativePath), + undefined + ); + + const createEvent = await queue.next(); + assert.strictEqual(createEvent, create); + + const deleteEvent = await queue.next(); + assert.strictEqual(deleteEvent?.type, SyncEventType.LocalDelete); + assert.strictEqual(deleteEvent.documentId, create.resolvers.promise); + }); + + it("rename of a queued create drains same-path deletes first", async () => { + const queue = createQueue(); + await queue.upsertRecord( + fakeRecord("OLD", { localPath: "target.md" as RelativePath }) + ); + + await queue.enqueue({ + type: SyncEventType.LocalCreate, + path: "source.md" + }); + const create = queue.peekFront(); + assert.ok(create?.type === SyncEventType.LocalCreate); + + await queue.enqueue({ + type: SyncEventType.LocalDelete, + path: "target.md" + }); + await queue.enqueue({ + type: SyncEventType.LocalUpdate, + oldPath: "source.md", + path: "target.md" + }); + + const deleteEvent = await queue.next(); + assert.strictEqual(deleteEvent?.type, SyncEventType.LocalDelete); + assert.strictEqual(deleteEvent.documentId, "OLD"); + assert.strictEqual(deleteEvent.path, "target.md"); + + const createEvent = await queue.next(); + assert.strictEqual(createEvent, create); + assert.strictEqual(createEvent.path, "target.md"); + + const updateEvent = await queue.next(); + assert.strictEqual(updateEvent?.type, SyncEventType.LocalUpdate); + assert.strictEqual(updateEvent.documentId, create.resolvers.promise); + assert.strictEqual(updateEvent.path, "target.md"); + }); + + it("findLatestCreateForPath returns the pending create", async () => { + const queue = createQueue(); + + await queue.enqueue({ type: SyncEventType.LocalCreate, path: "a.md" }); + await queue.enqueue({ type: SyncEventType.LocalCreate, path: "b.md" }); + + const found = queue.findLatestCreateForPath("a.md" as RelativePath); + assert.ok(found !== undefined); + assert.strictEqual(found.path, "a.md"); + + const missing = queue.findLatestCreateForPath("c.md" as RelativePath); + assert.strictEqual(missing, undefined); + }); + + it("hasPendingEventsForPath reflects pending events", async () => { + const queue = createQueue(); + await queue.upsertRecord(fakeRecord("A")); + + assert.strictEqual( + queue.hasPendingEventsForPath("a.md" as RelativePath), + false + ); + + await queue.enqueue({ type: SyncEventType.LocalDelete, path: "a.md" }); + // After a delete the localPath is cleared; an unknown path is treated + // as "must be pending creation", so this still returns true. + assert.strictEqual( + queue.hasPendingEventsForPath("a.md" as RelativePath), + true + ); + }); + + it("setLocalPath displaces a previous holder of the same path", async () => { + const queue = createQueue(); + await queue.upsertRecord(fakeRecord("A")); + await queue.upsertRecord( + fakeRecord("B", { localPath: "b.md" as RelativePath }) + ); + + // Move B onto a.md — the slot already held by A. The invariant + // requires A's localPath to be cleared (placement-pending), + // and byLocalPath["a.md"] === B. + await queue.setLocalPath("B", "a.md" as RelativePath); + + const a = queue.getDocumentByDocumentId("A"); + const b = queue.getDocumentByDocumentId("B"); + assert.strictEqual(a?.localPath, undefined); + assert.strictEqual(b?.localPath, "a.md"); + assert.strictEqual( + queue.getRecordByLocalPath("a.md" as RelativePath)?.documentId, + "B" + ); + // B's old slot is now empty — nothing else moved into it. + assert.strictEqual( + queue.getRecordByLocalPath("b.md" as RelativePath), + undefined + ); + }); + + it("upsertRecord displaces a previous holder of the same path", async () => { + const queue = createQueue(); + await queue.upsertRecord(fakeRecord("A")); + + // A new record (different docId) claims a.md. The prior holder + // (A) must be displaced — its localPath cleared, and + // byLocalPath["a.md"] now points at the new record. + await queue.upsertRecord( + fakeRecord("B", { localPath: "a.md" as RelativePath }) + ); + + const a = queue.getDocumentByDocumentId("A"); + const b = queue.getDocumentByDocumentId("B"); + assert.strictEqual(a?.localPath, undefined); + assert.strictEqual(b?.localPath, "a.md"); + assert.strictEqual( + queue.getRecordByLocalPath("a.md" as RelativePath)?.documentId, + "B" + ); + }); + + it("the localPath/byLocalPath invariant holds across rename + recreate cycles", async () => { + // Construct the exact same-path create cycle that produces the + // bug-D race: docA at P, then docB created at P (via + // upsertRecord), and finally a setLocalPath that would move a + // third doc onto P. The invariant must hold at every step: + // exactly one record has localPath===P at any given time, and + // byLocalPath.get(P) returns it. + const queue = createQueue(); + + const path = "p.md" as RelativePath; + + await queue.upsertRecord( + fakeRecord("A", { localPath: path, remoteRelativePath: path }) + ); + + // Sanity: A holds the slot. + assert.strictEqual(queue.getRecordByLocalPath(path)?.documentId, "A"); + assert.strictEqual(queue.getDocumentByDocumentId("A")?.localPath, path); + + // docB created at P via upsertRecord (e.g. a remote create + // that races A's local file onto the same slot). A must be + // displaced. + await queue.upsertRecord( + fakeRecord("B", { localPath: path, remoteRelativePath: path }) + ); + assert.strictEqual( + queue.getDocumentByDocumentId("A")?.localPath, + undefined + ); + assert.strictEqual(queue.getDocumentByDocumentId("B")?.localPath, path); + assert.strictEqual(queue.getRecordByLocalPath(path)?.documentId, "B"); + + // Now setLocalPath moves a third doc C onto P. B must in turn + // be displaced; the invariant still holds. + await queue.upsertRecord( + fakeRecord("C", { localPath: "c.md" as RelativePath }) + ); + await queue.setLocalPath("C", path); + assert.strictEqual( + queue.getDocumentByDocumentId("B")?.localPath, + undefined + ); + assert.strictEqual(queue.getDocumentByDocumentId("C")?.localPath, path); + assert.strictEqual(queue.getRecordByLocalPath(path)?.documentId, "C"); + + // Across the whole cycle exactly one record holds the slot. + const holders = Array.from(queue.allRecords()).filter( + (r) => r.localPath === path + ); + assert.strictEqual(holders.length, 1); + assert.strictEqual(holders[0].documentId, "C"); + }); + + it("clearAllState clears everything", async () => { + const queue = createQueue(); + await queue.upsertRecord(fakeRecord("A")); + await queue.enqueue({ type: SyncEventType.LocalCreate, path: "b.md" }); + + await queue.clearAllState(); + + assert.strictEqual(queue.syncedDocumentCount, 0); + assert.strictEqual(queue.pendingUpdateCount, 0); + assert.strictEqual(queue.byLocalPath.size, 0); + }); +}); diff --git a/frontend/sync-client/src/sync-operations/sync-event-queue.ts b/frontend/sync-client/src/sync-operations/sync-event-queue.ts new file mode 100644 index 00000000..75f675d0 --- /dev/null +++ b/frontend/sync-client/src/sync-operations/sync-event-queue.ts @@ -0,0 +1,1000 @@ +import type { Settings } from "../persistence/settings"; +import type { Logger } from "../tracing/logger"; +import { globsToRegexes } from "../utils/globs-to-regexes"; +import { removeFromArray } from "../utils/remove-from-array"; +import { EventListeners } from "../utils/data-structures/event-listeners"; +import { + SyncEventType, + type DocumentId, + type DocumentRecord, + type FileSyncEvent, + type RelativePath, + type StoredSyncState, + type SyncEvent, + type VaultUpdateId +} from "./types"; +import { MinCovered } from "../utils/data-structures/min-covered"; + +export const STORED_STATE_SCHEMA_VERSION = 2; + +export class SyncEventQueue { + // Fires synchronously whenever the events array length changes (push, pop, + // remove, bulk-clear). The Syncer mirrors this into its public count + // listener; without this hook, listeners only saw deltas at consume time + // and missed the "queue grew" / "queue cleared on reset" transitions. + public readonly onPendingUpdateCountChanged = new EventListeners< + (count: number) => unknown + >(); + + // Fires whenever a record's `localPath` transitions to a different + // value. Subscribers see every disk-side path change — watcher- + // driven user renames, post-create deconflicts placed by the + // reconciler, lost-rename replays in offline-scan, displacements + // when another record claims a slot. Useful for callers that + // mirror disk-side state (e.g. test harnesses that maintain a + // "do-not-touch" list keyed by current path). Both `oldPath` and + // `newPath` may be `undefined` (placement-pending state). + public readonly onDocumentPathChanged = new EventListeners< + ( + documentId: DocumentId, + oldPath: RelativePath | undefined, + newPath: RelativePath | undefined + ) => unknown + >(); + + private readonly _lastSeenUpdateId: MinCovered; + + // Primary index of every settled document, keyed by docId. The wire loop + // (records ↔ server) updates `remoteRelativePath` here as the server + // assigns/relocates a doc; the Reconciler (records ↔ disk) updates + // `localPath` here as it places files on disk. + private readonly byDocId = new Map(); + + // Derived index from `localPath -> record`. Maintained alongside every + // mutation that touches `localPath` so callers (the watcher path through + // `enqueue`, the Reconciler) get O(1) lookups by disk location. Only + // contains records whose `localPath !== undefined`. + private readonly _byLocalPath = new Map(); + + // All outstanding operations in order of occurrence, + // can include multiple generations of the same document, + // e.g.: a create, delete, create sequence for the same path. + // + // The paths within the events must always correspond to the latest + // path on disk, so the path of each event may be updated multiple + // times. + // + // It maps pending changes onto the local filesystem. + private readonly events: SyncEvent[] = []; + + // file creations for paths matching any of these patterns are ignored + // because the user explicitly told us to ignore them. + private userIgnorePatterns: RegExp[]; + + // Hard-coded ignores that callers (e.g. the Syncer for `.vaultlink/**` + // swap-marker files) pin via `addInternalIgnorePattern`. Folded into + // `userIgnorePatterns` so the existing match path doesn't need to know + // about two arrays. Stored separately so a later `onSettingsChanged` + // event that re-derives `userIgnorePatterns` from settings doesn't + // forget the internal patterns. + private readonly internalIgnorePatterns: RegExp[] = []; + + // DocIds whose HTTP DELETE has been acked by the server but whose + // WebSocket-receipt-driven `removeDocumentById` hasn't run yet (the + // record is still in `byDocId` because the wire loop keeps it around to + // recognise late remote updates as "file is missing"). The Reconciler + // and the remote-update wire-loop handlers consult this set to skip any + // work that would resurrect the doc — without it, a placement-pending + // record (`localPath === undefined` after the LocalDelete enqueue) would + // be re-fetched from the server and written back to disk, or a late + // RemoteChange for the same doc would stash the pre-delete bytes into + // `pendingPlacementContent` for the Reconciler to "place". + // + // Cleared as a side effect of `removeDocumentById`. Also cleared on + // `clearAllState` / schema-version-mismatch reset. + private readonly _pendingServerDeletes = new Set(); + + public constructor( + private readonly settings: Settings, + private readonly logger: Logger, + initialState: Partial | undefined, + private readonly saveData: (data: StoredSyncState) => Promise + ) { + this.userIgnorePatterns = globsToRegexes( + this.settings.getSettings().ignorePatterns, + this.logger + ); + + this.settings.onSettingsChanged.add((newSettings) => { + this.userIgnorePatterns = [ + ...globsToRegexes(newSettings.ignorePatterns, this.logger), + ...this.internalIgnorePatterns + ]; + }); + + initialState ??= {}; + + const persistedSchemaVersion = initialState.schemaVersion; + if (persistedSchemaVersion !== STORED_STATE_SCHEMA_VERSION) { + this.logger.info( + `Persisted state schema version is ${persistedSchemaVersion ?? "unset"}, expected ${STORED_STATE_SCHEMA_VERSION}; discarding persisted documents and watermark so the offline scan re-derives state from disk` + ); + initialState = {}; + // Schedule a save so the new schema version sticks even if the user + // never makes a change. Don't await here (constructor is sync); the + // first real save in `save()` will pin it down anyway. + void this.saveData({ + schemaVersion: STORED_STATE_SCHEMA_VERSION, + documents: [], + lastSeenUpdateId: 0 + }); + } + + if (initialState.documents !== undefined) { + for (const record of initialState.documents) { + this.byDocId.set(record.documentId, record); + if (record.localPath !== undefined) { + // Defensive: if two persisted records share the same + // localPath (shouldn't happen given the invariant + // enforced at every mutation point, but persisted + // state from older buggy versions could violate it), + // displace the prior holder so we don't end up with + // a shadowed record on load. + const displaced = this._byLocalPath.get(record.localPath); + if (displaced !== undefined && displaced !== record) { + displaced.localPath = undefined; + this.logger.warn( + `Persisted state had two records sharing localPath ` + + `${record.localPath} (${displaced.documentId} and ` + + `${record.documentId}); clearing the prior holder's ` + + `localPath so the reconciler re-places it` + ); + } + this._byLocalPath.set(record.localPath, record); + } + } + } + this._lastSeenUpdateId = new MinCovered( + initialState.lastSeenUpdateId ?? 0 + ); + + this.logger.debug( + `Loaded ${this.byDocId.size} documents and lastSeenUpdateId=${this._lastSeenUpdateId.min} from storage` + ); + } + + public get pendingUpdateCount(): number { + return this.events.length; + } + + public get syncedDocumentCount(): number { + return this.byDocId.size; + } + + /** + * Read-only view of the `localPath -> record` index. Use for O(1) lookups + * by disk location; the index is maintained by every mutation that + * touches `localPath` (`upsertRecord`, `setLocalPath`, the rename branch + * of `enqueue`, `removeDocumentById`). + */ + public get byLocalPath(): ReadonlyMap { + return this._byLocalPath; + } + + public get lastSeenUpdateId(): VaultUpdateId { + return this._lastSeenUpdateId.min; + } + + public set lastSeenUpdateId(id: VaultUpdateId) { + this._lastSeenUpdateId.add(id); + } + + /** + * Watermark to send with our own `POST /documents` requests. + * + * The contiguous-prefix `lastSeenUpdateId` lags behind reality whenever + * there are gaps in the vuid stream we've observed: if the server has + * committed vuids 1..N from various clients but we've only processed + * a non-contiguous subset, `min` stays at the last hole. The server's + * create handler reads this watermark to decide whether to merge a + * new POST into an existing doc at the same path: + * + * creation_vault_update_id > last_seen_vault_update_id → merge + * + * That check is meant to fire only for docs the client genuinely + * couldn't have known about. But on a same-device "rename a + * pending-create away then create something else at that path" race, + * the second POST went out with `last_seen = min` while we already + * held a record for the first create at vuid=N — and the server + * happily merged the second create into our own doc, aliasing two + * physically distinct local files onto a single docId. + * + * The fix is path-scoped: if we already track a doc whose + * `remoteRelativePath` matches the path we're about to POST, the + * server's existing doc at that path is exactly the one we'd alias + * into. Bumping `last_seen` to that record's `parentVersionId` + * forces the server's `creation_vuid > last_seen` check to fail and + * fall through to the deconflict path. For paths we don't yet + * track, we send the regular `min` watermark — so a legitimate + * cross-device merge (two clients independently creating the same + * path) still fires when neither side holds a record for the + * collision target. + */ + public lastSeenUpdateIdForCreate( + requestPath: RelativePath + ): VaultUpdateId { + let watermark = this._lastSeenUpdateId.min; + for (const record of this.byDocId.values()) { + if ( + record.remoteRelativePath === requestPath && + record.parentVersionId > watermark + ) { + watermark = record.parentVersionId; + } + } + return watermark; + } + + /** + * Pin an additional ignore pattern that survives setting reloads. Used + * by the Syncer to hide internal scratch paths (e.g. `.vaultlink/**` + * swap markers written by the Reconciler) from the watcher-driven + * enqueue path. The pattern is compiled with the same `globsToRegexes` + * used for user-configurable ignores; matching uses the existing + * userIgnorePatterns array so there's only one match path. + */ + public addInternalIgnorePattern(pattern: string): void { + const compiled = globsToRegexes([pattern], this.logger); + this.internalIgnorePatterns.push(...compiled); + this.userIgnorePatterns.push(...compiled); + } + + public async enqueue(input: FileSyncEvent): Promise { + const path = + input.type === SyncEventType.RemoteChange + ? input.remoteVersion.relativePath + : input.path; + + if (this.userIgnorePatterns.some((pattern) => pattern.test(path))) { + this.logger.info( + `Ignoring ${input.type} for ${path} as it matches ignore patterns` + ); + return; + } + + if (input.type === SyncEventType.RemoteChange) { + this.events.push(input); + this.notifyPendingUpdateCountChanged(); + return; + } + + if (input.type === SyncEventType.LocalCreate) { + this.events.push({ + type: SyncEventType.LocalCreate, + path, + isProcessing: false, + resolvers: Promise.withResolvers() + }); + this.notifyPendingUpdateCountChanged(); + return; + } + + const lookupPath = + input.type === SyncEventType.LocalUpdate && + input.oldPath !== undefined + ? input.oldPath + : path; + const record = this._byLocalPath.get(lookupPath); + + // If a settled record and a pending create both claim this path, the + // settled record owns the current disk slot, unless the record is + // already being deleted. A deleting record can briefly remain in the + // localPath index when a create/delete pair was queued while the + // create was pending; it must not steal the next same-path create's + // delete/update. + const pendingCreate = this.findLatestCreateForPath(lookupPath); + const pendingDocumentId: Promise | undefined = + pendingCreate?.resolvers.promise; + + const recordIsDeleting = + record !== undefined && + (this.hasPendingLocalDeleteForDocumentId(record.documentId) || + this.hasPendingServerDelete(record.documentId)); + const recordOwnsLookupPath = + record !== undefined && + !(recordIsDeleting && pendingDocumentId !== undefined); + + const documentId: DocumentId | undefined = recordOwnsLookupPath + ? record.documentId + : undefined; + + const effectiveDocumentId: + | Promise + | DocumentId + | undefined = documentId ?? pendingDocumentId; + if (effectiveDocumentId === undefined) { + // we can get here when deleting a local document after a remote update + return; + } + + if (input.type === SyncEventType.LocalDelete) { + if ( + documentId === undefined && + pendingCreate !== undefined && + !pendingCreate.isProcessing + ) { + this.cancelPendingCreate(pendingCreate); + if (recordIsDeleting && record !== undefined) { + // A stale deleting record was still claiming this path. + // The not-yet-started create/delete pair collapsed to + // nothing, and the disk file is gone, so clear the stale + // claim too. + await this.setLocalPath(record.documentId, undefined); + } + return; + } + + // Push BEFORE awaiting `setLocalPath` (and its inner `save()`). + // See the comment below on the synchronicity contract with + // `ensureDraining()`. + this.events.push({ + type: SyncEventType.LocalDelete, + documentId: effectiveDocumentId, + path: lookupPath + }); + this.notifyPendingUpdateCountChanged(); + if (recordOwnsLookupPath && record !== undefined) { + // The file is gone from disk; clear the doc's localPath so the + // Reconciler doesn't try to operate on a vacated slot. + await this.setLocalPath(record.documentId, undefined); + } else if (recordIsDeleting && record !== undefined) { + // A stale deleting record was still claiming this path while a + // newer pending create owned the actual disk file. Drop the + // stale claim now that the file is gone. + await this.setLocalPath(record.documentId, undefined); + } + return; + } + + const isUserRename = input.oldPath !== undefined; + let needsSave = false; + if (input.oldPath !== undefined) { + if (!recordOwnsLookupPath && pendingDocumentId !== undefined) { + this.updatePendingCreatePath(input.oldPath, path); + } else { + if (record === undefined || !recordOwnsLookupPath) { + throw new Error( + "Unreachable: record must be defined for non-pending update" + ); + } + // The user renamed `oldPath` onto `path`. If `path` was + // already tracked by a *different* doc (the OS rename + // overwrote that file), that doc effectively no longer + // exists locally — its content was clobbered. Without + // explicitly recording the loss the doc would silently + // drop out of the byLocalPath index below and we'd skip + // notifying the server, leaving a phantom on the remote + // that other agents still see. Enqueue a LocalDelete for + // it so the server learns about the deletion. + const displacedRecord = this._byLocalPath.get(path); + if ( + displacedRecord !== undefined && + displacedRecord.documentId !== record.documentId + ) { + this.events.push({ + type: SyncEventType.LocalDelete, + documentId: displacedRecord.documentId, + // Snapshot the path; once we move `record` onto + // `path` below the displaced doc will no longer + // resolve via `byLocalPath`. + path + }); + // Drop the displaced doc's localPath: its file on + // disk is gone (overwritten by the rename). + // Mutate synchronously so the byLocalPath index is + // correct before we move `record` onto the same + // slot below; the persist runs in the trailing + // `save()` so we don't await before pushing the + // LocalUpdate (synchronicity contract). + this.mutateLocalPathInPlace(displacedRecord, undefined); + needsSave = true; + } + // Move record's localPath onto the new slot. We mutate + // the record in place rather than re-creating it so any + // held reference (drain handlers, queued events) sees + // the new path on its next read. + this.mutateLocalPathInPlace(record, path); + // Retarget any queued LocalUpdates for this doc onto + // the new path. The queue's invariant — and what + // `skipIfOversized` and the watcher dedup checks bake + // in — is that `event.path` always points at the doc's + // current disk location. + for (const e of this.events) { + if ( + e.type === SyncEventType.LocalUpdate && + e.documentId === record.documentId + ) { + e.path = path; + } + } + needsSave = true; + } + } + + // Push BEFORE awaiting `save()`. The synchronicity contract is: + // `Syncer.ensureDraining()` runs immediately after each `enqueue`, + // and the drain only sees what's in `events[]`. Pushing after an + // await would let the drain start, see an empty queue, exit, and + // leave the event stranded. + this.events.push({ + type: SyncEventType.LocalUpdate, + documentId: effectiveDocumentId, + path, + originalPath: path, + isUserRename + }); + this.notifyPendingUpdateCountChanged(); + + if (needsSave) { + await this.save(); + } + } + + public async next(): Promise { + const event = this.events.shift(); + if (event !== undefined) { + this.notifyPendingUpdateCountChanged(); + } + return event; + } + + /** + * Return the next event without removing it. Drain uses this so the + * event stays visible in the queue while it is being processed — + * critical for `findLatestCreateForPath` to update an in-flight + * `LocalCreate`'s local read path when a rename arrives mid-process. + */ + public peekFront(): SyncEvent | undefined { + return this.events[0]; + } + + /** + * Remove a specific event after `peekFront`-based processing is done. + * Idempotent — safe to call when the event was already taken out by + * `resolveCreate` (which clears a same-path pending create that a + * remote-create handler just absorbed). + */ + public consumeEvent(event: SyncEvent): void { + if (removeFromArray(this.events, event)) { + this.notifyPendingUpdateCountChanged(); + } + } + + /** + * Call once a create has been acknowledged by the server. + * + * Queued `LocalUpdate` / `LocalDelete` events that were pushed while + * this create was still in-flight carry the create's `resolvers.promise` + * as their `documentId` (see the `pendingDocumentId` branch of + * `enqueue`). We must rewrite those references to the resolved string + * id *before* calling `upsertRecord`, otherwise its event-rewrite loop + * (which compares `e.documentId === record.documentId`) would silently + * skip them — leaving their `event.path` pointing at the pre-rename + * slot and causing the next drain step's `getFileSize(event.path)` to + * throw `FileNotFoundError`, dropping the user's intent. + */ + public async resolveCreate( + event: Extract, + record: DocumentRecord + ): Promise { + if (removeFromArray(this.events, event)) { + this.notifyPendingUpdateCountChanged(); + } + this.replacePendingDocumentId( + event.resolvers.promise, + record.documentId + ); + const localPath = this.hasPendingLocalDeleteForDocumentId( + record.documentId, + record.localPath + ) + ? undefined + : record.localPath; + await this.upsertRecord({ ...record, localPath }); + event.resolvers.resolve(record.documentId); + } + + /** + * Swap a pending create's `Promise` reference for the + * resolved string id across every queued `LocalUpdate` / `LocalDelete`. + * Call this whenever a create resolves (regular ack OR + * displacement-merge into an existing doc) — see `resolveCreate` for + * the failure mode if it's skipped. + */ + public replacePendingDocumentId( + promise: Promise, + documentId: DocumentId + ): void { + for (const e of this.events) { + if ( + (e.type === SyncEventType.LocalUpdate || + e.type === SyncEventType.LocalDelete) && + e.documentId === promise + ) { + e.documentId = documentId; + } + } + } + + /** + * Insert or merge a document record by `documentId`. When a record with + * the same docId already exists it is mutated in place so any held + * references (drain handlers, queued events) keep seeing the up-to-date + * fields on their next read — this stays load-bearing for the Syncer's + * drain handlers, which await across HTTP roundtrips. + * + * For an existing record this updates the wire fields + * (`parentVersionId`, `remoteHash`, `remoteRelativePath`) and, only + * when the existing record has no local file yet + * (`localPath === undefined`), installs the supplied `localPath`. A + * non-undefined existing localPath is owned by the watcher path and + * the Reconciler — overwriting it from the wire loop would race a + * user rename that landed during an HTTP roundtrip and silently + * resurrect a stale slot. + */ + public async upsertRecord(record: DocumentRecord): Promise { + const existing = this.byDocId.get(record.documentId); + if (existing === undefined) { + const target: DocumentRecord = { ...record }; + this.byDocId.set(record.documentId, target); + if (target.localPath !== undefined) { + // Route through `mutateLocalPathInPlace` so the + // localPath/byLocalPath invariant is upheld: if another + // record already holds this slot, displace it (clear + // its localPath) before installing `target`. Otherwise + // we'd leave the displaced record shadowed (its + // `localPath` still points at a slot that no longer + // belongs to it), which the Reconciler would then + // "rescue" by reading/renaming the file at that path + // — but that file belongs to `target` now, causing + // data loss. + target.localPath = undefined; + this.mutateLocalPathInPlace(target, record.localPath); + } + } else { + existing.parentVersionId = record.parentVersionId; + existing.remoteHash = record.remoteHash; + existing.remoteRelativePath = record.remoteRelativePath; + if ( + existing.localPath === undefined && + record.localPath !== undefined + ) { + return this.setLocalPath(record.documentId, record.localPath); + } + } + return this.save(); + } + + /** + * Update the `localPath` of an already-tracked record (by docId) and + * re-key the `byLocalPath` index. Called by both the watcher path + * (through `enqueue`) and the Reconciler. + * + * Pass `undefined` to mark the doc as "no local file" — the Reconciler + * will place a file later (e.g. a remote create whose + * `remoteRelativePath` slot is occupied at receive time). + */ + public async setLocalPath( + documentId: DocumentId, + newLocalPath: RelativePath | undefined + ): Promise { + const record = this.byDocId.get(documentId); + if (record === undefined) { + return; + } + this.mutateLocalPathInPlace(record, newLocalPath); + return this.save(); + } + + public async removeDocumentById(documentId: DocumentId): Promise { + const record = this.byDocId.get(documentId); + if (record === undefined) { + // Still clear any deletion-pending mark and purge stale + // RemoteChange events so a never-tracked doc doesn't accumulate + // entries. + this._pendingServerDeletes.delete(documentId); + this.purgeRemoteChangesForDocumentId(documentId); + return; + } + if ( + record.localPath !== undefined && + this._byLocalPath.get(record.localPath) === record + ) { + this._byLocalPath.delete(record.localPath); + } + this.byDocId.delete(documentId); + this._pendingServerDeletes.delete(documentId); + // Drop any pending RemoteChange events for this doc. A common case: + // a catch-up RemoteChange for the doc was deferred indefinitely + // while the user's LocalDelete (and any LocalUpdate behind it) sat + // in the queue ahead of it. Once those drain and the doc is + // removed, a still-pending RemoteChange for an earlier version + // would be processed by `processRemoteCreateForNewDocument` (the + // doc is now untracked, and catch-up's `isNewFile=true` semantics + // qualify it as a fresh create), resurrecting the doc on disk + // with stale bytes that disagree with every other agent. + this.purgeRemoteChangesForDocumentId(documentId); + return this.save(); + } + + /** + * Mark a doc as "HTTP DELETE has been acked by the server but the + * WebSocket receipt that would call `removeDocumentById` hasn't arrived + * yet". The Reconciler and remote-update wire-loop handlers consult + * `hasPendingServerDelete` to skip any work that would resurrect the + * doc. Cleared automatically by `removeDocumentById`. + */ + public markServerDeletePending(documentId: DocumentId): void { + this._pendingServerDeletes.add(documentId); + } + + public hasPendingServerDelete(documentId: DocumentId): boolean { + return this._pendingServerDeletes.has(documentId); + } + + public getDocumentByDocumentId( + target: DocumentId + ): DocumentRecord | undefined { + return this.byDocId.get(target); + } + + public getDocumentByDocumentIdOrFail(target: DocumentId): DocumentRecord { + const result = this.getDocumentByDocumentId(target); + if (!result) { + throw new Error(`No document found with id ${target}`); + } + return result; + } + + public getRecordByLocalPath( + path: RelativePath + ): DocumentRecord | undefined { + return this._byLocalPath.get(path); + } + + public async save(): Promise { + return this.saveData({ + schemaVersion: STORED_STATE_SCHEMA_VERSION, + documents: Array.from(this.byDocId.values()), + lastSeenUpdateId: this.lastSeenUpdateId + }); + } + + public allSettledDocuments(): Map { + const result = new Map(); + for (const record of this.byDocId.values()) { + if (record.localPath !== undefined) { + result.set(record.localPath, record); + } + } + return result; + } + + /** + * Every tracked record, regardless of whether it has been placed on + * disk yet. The Reconciler uses this to find records whose + * `localPath === undefined` (e.g. a remote create that landed when + * its target slot was occupied) and try to place them once the + * obstruction clears. `allSettledDocuments` filters those out, so + * relying on it would render placement-pending records invisible + * forever. + */ + public allRecords(): Iterable { + return this.byDocId.values(); + } + + public hasPendingEventsForPath(path: RelativePath): boolean { + const record = this._byLocalPath.get(path); + if (record === undefined) { + return true; // if we don't know about this path, it must be pending creation + } + const docId = record.documentId; + return this.events.some( + (e) => + (e.type === SyncEventType.LocalCreate && e.path === path) || + (e.type === SyncEventType.LocalUpdate && + e.documentId === docId) || + (e.type === SyncEventType.LocalDelete && + e.documentId === docId) || + (e.type === SyncEventType.RemoteChange && + // we care about the local path not the remote + this.getDocumentByDocumentId(e.remoteVersion.documentId) + ?.localPath === path) + ); + } + + public hasPendingLocalEventsForDocumentId(documentId: DocumentId): boolean { + return this.events.some( + (e) => + (e.type === SyncEventType.LocalUpdate && + e.documentId === documentId) || + (e.type === SyncEventType.LocalDelete && + e.documentId === documentId) + ); + } + + public hasPendingLocalDeleteForDocumentId( + documentId: DocumentId, + path?: RelativePath + ): boolean { + return this.events.some( + (e) => + e.type === SyncEventType.LocalDelete && + e.documentId === documentId && + (path === undefined || e.path === path) + ); + } + + public async clearAllState(): Promise { + this.clearPending(); + this.byDocId.clear(); + this._byLocalPath.clear(); + this._pendingServerDeletes.clear(); + this._lastSeenUpdateId.reset(); + await this.save(); + } + + public clearPending(): void { + const hadEvents = this.events.length > 0; + this.rejectAllPendingCreates(); + this.events.length = 0; + if (hadEvents) { + this.notifyPendingUpdateCountChanged(); + } + } + + public findLatestCreateForPath( + path: RelativePath + ): Extract | undefined { + for (let i = this.events.length - 1; i >= 0; i--) { + const e = this.events[i]; + if (e.type === SyncEventType.LocalCreate && e.path === path) { + return e; + } + } + return undefined; + } + + public hasPendingCreateForPath(path: RelativePath): boolean { + return this.events.some( + (e) => e.type === SyncEventType.LocalCreate && e.path === path + ); + } + + public updatePendingCreatePath( + oldPath: RelativePath, + newPath: RelativePath + ): void { + const createEvent = this.findLatestCreateForPath(oldPath); + if (createEvent === undefined) { + return; + } + + const { promise } = createEvent.resolvers; + createEvent.path = newPath; + if (!createEvent.isProcessing) { + this.moveBlockingDeletesBeforeCreate(createEvent, newPath); + this.moveBlockingRenamesBeforeCreate(createEvent, newPath); + } + + for (const e of this.events) { + if ( + e.type === SyncEventType.LocalUpdate && + e.documentId === promise + ) { + e.path = newPath; + } + } + } + + private moveBlockingDeletesBeforeCreate( + createEvent: Extract, + path: RelativePath + ): void { + const { promise } = createEvent.resolvers; + let createIndex = this.events.indexOf(createEvent); + if (createIndex < 0) { + return; + } + + for (let i = createIndex + 1; i < this.events.length; ) { + const event = this.events[i]; + if ( + event.type === SyncEventType.LocalDelete && + event.path === path && + event.documentId !== promise + ) { + this.events.splice(i, 1); + this.events.splice(createIndex, 0, event); + createIndex++; + continue; + } + i++; + } + } + + /** + * The `path` argument is the create's just-retargeted target. Any + * other tracked doc whose server-side path is still `path` (its + * watcher-driven local rename hasn't reached the server yet) needs + * its pending LocalUpdate to drain *before* this create — otherwise + * the create's HTTP request hits the server while the doc is still + * at `path` and triggers a same-path same-docId merge that + * silently consumes the user's "new doc" intent into the + * already-tracked doc. The pending LocalUpdate is the rename that + * moves the existing doc off `path` server-side; running it first + * frees the slot. Skipped when the create has already been sent — + * at that point the merge has already happened or hasn't, and + * reordering the queue can't unwind it. + */ + private moveBlockingRenamesBeforeCreate( + createEvent: Extract, + path: RelativePath + ): void { + const blockingDocIds = new Set(); + for (const record of this.byDocId.values()) { + if ( + record.remoteRelativePath === path && + record.localPath !== path + ) { + blockingDocIds.add(record.documentId); + } + } + if (blockingDocIds.size === 0) { + return; + } + + let createIndex = this.events.indexOf(createEvent); + if (createIndex < 0) { + return; + } + + for (let i = createIndex + 1; i < this.events.length; ) { + const event = this.events[i]; + if ( + event.type === SyncEventType.LocalUpdate && + typeof event.documentId === "string" && + blockingDocIds.has(event.documentId) + ) { + this.events.splice(i, 1); + this.events.splice(createIndex, 0, event); + createIndex++; + continue; + } + i++; + } + } + + /** + * Synchronous half of `setLocalPath`: mutate `record.localPath` and + * re-key `_byLocalPath` without persisting. Used by `enqueue`'s + * rename branch where the synchronicity contract requires we push + * the LocalUpdate event before awaiting the save. + * + * Enforces the invariant + * `record.localPath !== undefined ⇒ byLocalPath.get(record.localPath) === record`. + * If `newLocalPath` is currently held by a different record, that + * record is *displaced*: its `localPath` is cleared so it enters + * placement-pending state, and the Reconciler's next pass will + * re-place it via `tryInitialPlacement`. Without this displacement + * the prior holder would remain shadowed (its `localPath === P` + * but `byLocalPath[P]` points elsewhere) and the Reconciler could + * later try to "rescue" the shadowed record by reading/renaming + * the file at `P` — which belongs to the new owner now — causing + * data loss. This is the architectural fix for bug D + * (`Files from agent-1 missing in agent-0` after a same-path + * create cycle). + */ + private mutateLocalPathInPlace( + record: DocumentRecord, + newLocalPath: RelativePath | undefined + ): void { + const previousLocalPath = record.localPath; + if ( + previousLocalPath !== undefined && + this._byLocalPath.get(previousLocalPath) === record + ) { + this._byLocalPath.delete(previousLocalPath); + } + record.localPath = newLocalPath; + let displacedRecord: DocumentRecord | undefined; + let displacedOldPath: RelativePath | undefined; + if (newLocalPath !== undefined) { + const displaced = this._byLocalPath.get(newLocalPath); + if (displaced !== undefined && displaced !== record) { + // Invariant: `byLocalPath[displaced.localPath] === displaced`. + // We're about to overwrite that slot, so clear the + // displaced record's localPath; the reconciler will + // re-place it via tryInitialPlacement on the next pass. + displacedOldPath = displaced.localPath; + displaced.localPath = undefined; + displacedRecord = displaced; + } + this._byLocalPath.set(newLocalPath, record); + } + if (previousLocalPath !== newLocalPath) { + this.onDocumentPathChanged.trigger( + record.documentId, + previousLocalPath, + newLocalPath + ); + } + if (displacedRecord !== undefined) { + this.onDocumentPathChanged.trigger( + displacedRecord.documentId, + displacedOldPath, + undefined + ); + } + } + + private notifyPendingUpdateCountChanged(): void { + this.onPendingUpdateCountChanged.trigger(this.events.length); + } + + private rejectAllPendingCreates(): void { + for (const event of this.events) { + if (event.type === SyncEventType.LocalCreate) { + event.resolvers.promise.catch(() => { + /* suppressed — consumer may not be listening */ + }); + event.resolvers.reject(new Error("Create was cancelled")); + } + } + } + + private cancelPendingCreate( + createEvent: Extract + ): void { + const { promise } = createEvent.resolvers; + const toRemove = this.events.filter( + (event) => + event === createEvent || + ((event.type === SyncEventType.LocalUpdate || + event.type === SyncEventType.LocalDelete) && + event.documentId === promise) + ); + + for (const event of toRemove) { + removeFromArray(this.events, event); + } + + createEvent.resolvers.promise.catch(() => { + /* suppressed — the create/delete pair collapsed locally */ + }); + createEvent.resolvers.reject(new Error("Create was cancelled")); + + if (toRemove.length > 0) { + this.notifyPendingUpdateCountChanged(); + } + } + + private purgeRemoteChangesForDocumentId(documentId: DocumentId): void { + const toRemove = this.events.filter( + (e) => + e.type === SyncEventType.RemoteChange && + e.remoteVersion.documentId === documentId + ); + for (const event of toRemove) { + if (event.type === SyncEventType.RemoteChange) { + // Advance the watermark for the dropped event so the gap + // doesn't leave the catch-up replay this id forever. + this._lastSeenUpdateId.add(event.remoteVersion.vaultUpdateId); + } + removeFromArray(this.events, event); + } + if (toRemove.length > 0) { + this.notifyPendingUpdateCountChanged(); + } + } +} diff --git a/frontend/sync-client/src/sync-operations/syncer.ts b/frontend/sync-client/src/sync-operations/syncer.ts index 71dedd85..4e908600 100644 --- a/frontend/sync-client/src/sync-operations/syncer.ts +++ b/frontend/sync-client/src/sync-operations/syncer.ts @@ -1,238 +1,191 @@ -import type { - Database, - DocumentId, - DocumentRecord, - RelativePath -} from "../persistence/database"; -import type { SyncService } from "../services/sync-service"; +// Two-loop sync engine. The wire loop (this file) keeps records in step +// with the server: HTTP/WS handlers update record fields and write +// content to the file at `record.localPath`. They never move files for +// path placement. The Reconciler (reconciler.ts) handles record↔disk +// path reconciliation, running after every wire-loop drained event. +import { + SyncEventType, + type DocumentId, + type DocumentRecord, + type SyncEvent, + type RelativePath, + type VaultUpdateId +} from "./types"; import type { Logger } from "../tracing/logger"; -import PQueue from "p-queue"; import { hash } from "../utils/hash"; -import { v4 as uuidv4 } from "uuid"; import type { Settings } from "../persistence/settings"; import type { FileOperations } from "../file-operations/file-operations"; -import { findMatchingFile } from "../utils/find-matching-file"; -import type { UnrestrictedSyncer } from "./unrestricted-syncer"; -import { createPromise } from "../utils/create-promise"; -import { SyncResetError } from "../services/sync-reset-error"; -import { Locks } from "../utils/data-structures/locks"; +import { FileAlreadyExistsError } from "../errors/file-already-exists-error"; +import { scheduleOfflineChanges } from "./offline-change-detector"; +import { SyncResetError } from "../errors/sync-reset-error"; import type { DocumentVersionWithoutContent } from "../services/types/DocumentVersionWithoutContent"; import type { WebSocketVaultUpdate } from "../services/types/WebSocketVaultUpdate"; import type { WebSocketManager } from "../services/websocket-manager"; import type { WebSocketClientMessage } from "../services/types/WebSocketClientMessage"; -import { awaitAll } from "../utils/await-all"; import { EventListeners } from "../utils/data-structures/event-listeners"; +import type { SyncEventQueue } from "./sync-event-queue"; +import type { SyncService } from "../services/sync-service"; +import { FileNotFoundError } from "../errors/file-not-found-error"; +import { HttpClientError } from "../errors/http-client-error"; +import type { SyncHistory } from "../tracing/sync-history"; +import { + SyncStatus, + SyncType, + type HistoryEntry +} from "../tracing/sync-history"; +import { isBinary } from "../utils/is-binary"; +import { isFileTypeMergable } from "../utils/is-file-type-mergable"; +import { diff } from "reconcile-text"; +import type { ServerConfig } from "../services/server-config"; +import type { FixedSizeDocumentCache } from "../utils/data-structures/fix-sized-cache"; +import { base64ToBytes } from "byte-base64"; +import type { DocumentUpdateResponse } from "../services/types/DocumentUpdateResponse"; +import { Reconciler } from "./reconciler"; + +// Internal ignore pattern pinned on the queue at construction time so +// the watcher's enqueue path doesn't pick up Reconciler swap markers. +const VAULTLINK_INTERNAL_DIR_IGNORE = ".vaultlink/**"; export class Syncer { public readonly onRemainingOperationsCountChanged = new EventListeners< (remainingOperations: number) => unknown >(); - private readonly remoteDocumentsLock: Locks; + private readonly queue: SyncEventQueue; + private readonly reconciler: Reconciler; + // Bytes the wire loop received for a doc whose `localPath` is not yet + // set (e.g. a remote create whose target slot was occupied). Shared + // with the Reconciler, which consumes (and deletes the entry) when it + // places the file. Keeping the bytes here avoids a redundant + // server fetch on the very next reconciler pass. + private readonly pendingPlacementContent = new Map< + DocumentId, + Uint8Array + >(); - // FIFO to limit the number of concurrent sync operations - private readonly syncQueue: PQueue; - - private _isFirstSyncComplete = false; private runningScheduleSyncForOfflineChanges: Promise | undefined; + private drainPromise: Promise | undefined; + private drainRequestedWhileRunning = false; + private isDrainingPaused = false; + private isScanning = false; private previousRemainingOperationsCount = 0; public constructor( private readonly deviceId: string, private readonly logger: Logger, - private readonly database: Database, private readonly settings: Settings, - private readonly syncService: SyncService, private readonly webSocketManager: WebSocketManager, private readonly operations: FileOperations, - private readonly internalSyncer: UnrestrictedSyncer + private readonly syncService: SyncService, + private readonly history: SyncHistory, + private readonly contentCache: FixedSizeDocumentCache, + private readonly serverConfig: ServerConfig, + queue: SyncEventQueue ) { - this.syncQueue = new PQueue({ - concurrency: settings.getSettings().syncConcurrency - }); + this.queue = queue; - this.remoteDocumentsLock = new Locks(this.logger); + // Hide the Reconciler's swap-marker scratch directory from the + // watcher's enqueue path. Without this, the marker file the + // Reconciler writes during a cycle swap would race onto the + // queue as a LocalCreate, and the queue would push that to the + // server. + this.queue.addInternalIgnorePattern(VAULTLINK_INTERNAL_DIR_IGNORE); - settings.onSettingsChanged.add((newSettings, oldSettings) => { - if (newSettings.syncConcurrency !== oldSettings.syncConcurrency) { - this.syncQueue.concurrency = newSettings.syncConcurrency; - } - }); + this.reconciler = new Reconciler( + this.logger, + this.operations, + this.syncService, + this.queue, + this.pendingPlacementContent + ); - this.syncQueue.on("active", () => { - if (this.previousRemainingOperationsCount !== this.syncQueue.size) { - this.previousRemainingOperationsCount = this.syncQueue.size; - this.onRemainingOperationsCountChanged.trigger( - this.syncQueue.size - ); - } - }); + // Fire-and-forget: any swap marker left behind by a crash gets + // rolled forward before the first wire-loop event runs. Errors + // are logged inside the reconciler. + void this.reconciler.recoverFromInterruptedSwap(); this.webSocketManager.onWebSocketStatusChanged.add((isConnected) => { if (isConnected) { - // The JS WebSocket API doesn't support setting headers, so we have to send the token as a message this.sendHandshakeMessage(); } }); this.webSocketManager.onRemoteVaultUpdateReceived.add( this.syncRemotelyUpdatedFile.bind(this) ); + // Funnel every queue mutation (enqueue, consume, clearPending) through + // the public count notifier so listeners see grow/shrink transitions + // immediately rather than only when a drain consumes an event. + this.queue.onPendingUpdateCountChanged.add(() => { + this.notifyRemainingOperationsChanged(); + }); } - public get isFirstSyncComplete(): boolean { - return this._isFirstSyncComplete; + /** + * True while the syncer has *active* work the caller should wait on: a + * running offline scan or an in-flight drain. Pending queue events alone + * don't count — `pause()` and `SyncResetError` exit drain early without + * clearing the queue, and nothing will pick those events back up until + * sync is re-enabled. Treating queued-but-stuck events as pending work + * would deadlock `waitUntilFinishedInternal` (the awaits inside its loop + * are no-ops once the active work has settled). + * + * The contract that makes "in-flight only" sufficient: every codepath + * that enqueues an event ends in `ensureDraining()` (the local-sync + * methods, `syncRemotelyUpdatedFile`, and the tail of + * `internalScheduleSyncForOfflineChanges`). So if a WebSocket handler + * lands new work mid-await, the next loop iteration sees `drainPromise` + * set and waits on it. + * + * Uses `isScanning` rather than `runningScheduleSyncForOfflineChanges` + * because the latter is a "have we already scanned this session" latch + * that stays set after the scan resolves. + */ + public get hasPendingWork(): boolean { + return this.isScanning || this.drainPromise !== undefined; } - public async syncLocallyCreatedFile( - relativePath: RelativePath - ): Promise { - if ( - this.database.getLatestDocumentByRelativePath(relativePath) - ?.isDeleted === false - ) { - this.logger.debug( - `Document ${relativePath} already exists in the database, skipping` - ); - return; - } - - const [promise, resolve, reject] = createPromise(); - - const id = uuidv4(); - const document = this.database.createNewPendingDocument( - id, - relativePath, - promise - ); - - try { - await this.syncQueue.add(async () => - this.internalSyncer.unrestrictedSyncLocallyCreatedFile(document) - ); - - resolve(); - } catch (e) { - reject(e); - } finally { - this.database.removeDocumentPromise(promise); - } + public syncLocallyCreatedFile(relativePath: RelativePath): void { + void this.queue.enqueue({ + type: SyncEventType.LocalCreate, + path: relativePath + }); + this.ensureDraining(); } - public async syncLocallyDeletedFile( - relativePath: RelativePath - ): Promise { - if ( - this.database.getLatestDocumentByRelativePath(relativePath) - ?.isDeleted === true - ) { - // This is must be a consequence of us deleting a file because of a remote update - // which triggered a local delete, so we don't need to do anything here. - this.logger.debug( - `Document ${relativePath} has already been markes as deleted, skipping` - ); - return; - } - - // We have to have a record of the delete in case there's an in-flight update for the same - // document which finishes after the delete has succeeded and would introduce a phantom metadata record. - this.database.delete(relativePath); - - const [promise, resolve, reject] = createPromise(); - - const document = await this.database.getResolvedDocumentByRelativePath( - relativePath, - promise - ); - - try { - await this.syncQueue.add(async () => - this.internalSyncer.unrestrictedSyncLocallyDeletedFile(document) - ); - - resolve(); - - this.database.removeDocument(document); - } catch (e) { - reject(e); - } finally { - this.database.removeDocumentPromise(promise); - } - } - - public async syncLocallyUpdatedFile({ + public syncLocallyUpdatedFile({ oldPath, relativePath }: { oldPath?: RelativePath; relativePath: RelativePath; - }): Promise { - if (oldPath !== undefined) { - // We might have moved the document in the database before calling this method, - // in that case, we mustn't move it again. - if ( - this.database.getLatestDocumentByRelativePath(relativePath) === - undefined || - this.database.getLatestDocumentByRelativePath(relativePath) - ?.isDeleted === true - ) { - if (oldPath === relativePath) { - throw new Error( - `Old path and new path are the same: ${oldPath}` - ); - } + }): void { + void this.queue.enqueue({ + type: SyncEventType.LocalUpdate, + path: relativePath, + oldPath + }); + this.ensureDraining(); + } - this.database.move(oldPath, relativePath); - } - } + public syncLocallyDeletedFile(relativePath: RelativePath): void { + void this.queue.enqueue({ + type: SyncEventType.LocalDelete, + path: relativePath + }); + this.ensureDraining(); + } - let document = - this.database.getLatestDocumentByRelativePath(relativePath); + public async syncRemotelyUpdatedFile( + message: WebSocketVaultUpdate + ): Promise { + await this.scheduleSyncForOfflineChanges(); - if ( - oldPath !== undefined && - document?.metadata?.remoteRelativePath === relativePath - ) { - this.logger.debug( - `Document ${relativePath} has been moved as a result of a remote update, skipping sync` - ); - return; - } + void this.queue.enqueue({ + type: SyncEventType.RemoteChange, + remoteVersion: message.document + }); - if (document === undefined) { - this.logger.debug( - `Cannot find document ${relativePath} in the database, skipping` - ); - return; - } - - if (document.isDeleted) { - this.logger.debug( - `Document ${relativePath} has been deleted locally, skipping` - ); - return; - } - - const [promise, resolve, reject] = createPromise(); - - document = await this.database.getResolvedDocumentByRelativePath( - relativePath, - promise - ); - - try { - await this.syncQueue.add(async () => - this.internalSyncer.unrestrictedSyncLocallyUpdatedFile({ - oldPath, - document - }) - ); - - resolve(); - } catch (e) { - reject(e); - } finally { - this.database.removeDocumentPromise(promise); - } + this.ensureDraining(); } public async scheduleSyncForOfflineChanges(): Promise { @@ -245,7 +198,7 @@ export class Syncer { this.runningScheduleSyncForOfflineChanges = this.internalScheduleSyncForOfflineChanges(); await this.runningScheduleSyncForOfflineChanges; - this.logger.info(`All local changes have been applied remotely`); + this.logger.info(`All local changes have been queued`); } catch (e) { if (e instanceof SyncResetError) { this.logger.info( @@ -257,47 +210,49 @@ export class Syncer { `Not all local changes have been applied remotely: ${e}` ); throw e; - } finally { - this.runningScheduleSyncForOfflineChanges = undefined; } } public async waitUntilFinished(): Promise { await this.runningScheduleSyncForOfflineChanges; - await this.syncQueue.onIdle(); // Wait for queue to be empty and running tasks to finish - } - - public async syncRemotelyUpdatedFile( - message: WebSocketVaultUpdate - ): Promise { - try { - const handlerPromise = awaitAll( - message.documents.map(async (document) => - this.internalSyncRemotelyUpdatedFile(document) - ) - ); - - await handlerPromise; - - if (message.isInitialSync && message.documents.length > 0) { - this.database.setLastSeenUpdateId( - message.documents - .map((document) => document.vaultUpdateId) - .reduce((a, b) => Math.max(a, b)) - ); - } - - this._isFirstSyncComplete = true; - } catch (e) { - this.logger.error(`Failed to sync remotely updated file: ${e}`); + // A drain that finishes can be immediately followed by a new one + // (e.g. a remote event arriving), so re-check after each await. + while (this.drainPromise !== undefined) { + await this.drainPromise; } } public reset(): void { - this._isFirstSyncComplete = false; - this.syncQueue.clear(); - this.remoteDocumentsLock.reset(); - this.runningScheduleSyncForOfflineChanges = undefined; + this.queue.clearPending(); + this.clearOfflineScanGate(); + this.previousRemainingOperationsCount = 0; + } + + /** + * Reset the "have we already scanned this session" gate so a later + * `scheduleSyncForOfflineChanges()` actually performs a fresh scan + * instead of returning the previous (resolved) promise. Called when + * sync is paused so the next start picks up any offline edits made + * while sync was off. + */ + public clearOfflineScanGate(): void { + const current = this.runningScheduleSyncForOfflineChanges; + if (current !== undefined) { + void current.finally(() => { + if (this.runningScheduleSyncForOfflineChanges === current) { + this.runningScheduleSyncForOfflineChanges = undefined; + } + }); + } + } + + public pauseDraining(): void { + this.isDrainingPaused = true; + } + + public resumeDraining(): void { + this.isDrainingPaused = false; + this.ensureDraining(); } private sendHandshakeMessage(): void { @@ -305,218 +260,994 @@ export class Syncer { type: "handshake", deviceId: this.deviceId, token: this.settings.getSettings().token, - lastSeenVaultUpdateId: this.database.getLastSeenUpdateId() + lastSeenVaultUpdateId: this.queue.lastSeenUpdateId }; this.webSocketManager.sendHandshakeMessage(message); } - private async internalSyncRemotelyUpdatedFile( - remoteVersion: DocumentVersionWithoutContent - ): Promise { - let document = this.database.getDocumentByDocumentId( - remoteVersion.documentId - ); + private async internalScheduleSyncForOfflineChanges(): Promise { + this.isScanning = true; + try { + this.queue.clearPending(); // can't have conflicts between the offline scan and ongoing operations created during the preceeding pause - if (document === undefined) { - // Let's avoid the same documents getting created in parallel multiple times. - // There might be multiple tasks waiting for the lock - return this.remoteDocumentsLock.withLock( - remoteVersion.documentId, - async () => { - document = this.database.getDocumentByDocumentId( - remoteVersion.documentId - ); - - // We're either the first one to get the lock, so we have to create the document in `unrestrictedSyncRemotelyUpdatedFile` - if (document === undefined) { - await this.syncQueue.add(async () => - this.internalSyncer.unrestrictedSyncRemotelyUpdatedFile( - remoteVersion - ) - ); - } else { - const [promise, resolve, reject] = createPromise(); - - document = - await this.database.getResolvedDocumentByRelativePath( - document.relativePath, - promise - ); - - try { - await this.syncQueue.add(async () => - this.internalSyncer.unrestrictedSyncRemotelyUpdatedFile( - remoteVersion, - document - ) - ); - - resolve(); - } catch (e) { - reject(e); - } finally { - this.database.removeDocumentPromise(promise); - } - } - - this.database.addSeenUpdateId(remoteVersion.vaultUpdateId); + await scheduleOfflineChanges( + this.logger, + this.operations, + this.queue, + (path) => { + this.syncLocallyCreatedFile(path); + }, + (args) => { + this.syncLocallyUpdatedFile(args); + }, + (path) => { + this.syncLocallyDeletedFile(path); } ); - } - - // We're either the first one to get the lock, so we have to create the document in `unrestrictedSyncRemotelyUpdatedFile` - const [promise, resolve, reject] = createPromise(); - - document = await this.database.getResolvedDocumentByRelativePath( - document.relativePath, - promise - ); - - try { - await this.syncQueue.add(async () => - this.internalSyncer.unrestrictedSyncRemotelyUpdatedFile( - remoteVersion, - document - ) - ); - - resolve(); - } catch (e) { - reject(e); } finally { - this.database.removeDocumentPromise(promise); + this.isScanning = false; } - this.database.addSeenUpdateId(remoteVersion.vaultUpdateId); + this.ensureDraining(); } - private async internalScheduleSyncForOfflineChanges(): Promise { - await this.createFakeDocumentsFromRemoteState(); - - const allLocalFiles = await this.operations.listFilesRecursively(); - this.logger.info( - `Scheduling sync for ${allLocalFiles.length} local files` - ); - - let locallyPossiblyDeletedFiles: DocumentRecord[] = []; - - for (const document of this.database.resolvedDocuments) { - if ( - !document.isDeleted && - !(await this.operations.exists(document.relativePath)) - ) { - locallyPossiblyDeletedFiles.push(document); - } + private ensureDraining(): void { + if (this.drainPromise !== undefined) { + this.drainRequestedWhileRunning = true; + return; } + if (this.isScanning) { + return; + } + if (this.isDrainingPaused) { + return; + } + this.drainPromise = this.drain().finally(() => { + this.drainPromise = undefined; + const shouldRestart = + this.drainRequestedWhileRunning && + this.queue.pendingUpdateCount > 0 && + !this.isScanning && + !this.isDrainingPaused && + this.settings.getSettings().isSyncEnabled; + this.drainRequestedWhileRunning = false; + if (shouldRestart) { + this.ensureDraining(); + } + }); + } - await awaitAll( - allLocalFiles.map(async (relativePath) => { - if ( - this.database.getLatestDocumentByRelativePath(relativePath) - ?.metadata !== undefined - ) { - this.logger.debug( - `Document ${relativePath} might have been updated locally, scheduling sync to validate and update it` - ); + private async drain(): Promise { + // Peek then remove-after-processing (instead of shift-then-process): + // the event must remain reachable through `findLatestCreateForPath` + // while it is in flight, so a rename event arriving mid-process can + // call `updatePendingCreatePath` to retarget this create's local path. + for (;;) { + if ( + this.isDrainingPaused || + !this.settings.getSettings().isSyncEnabled + ) { + this.logger.debug( + "Drain pausing because sync is disabled; events stay queued" + ); + return; + } + const event = this.queue.peekFront(); - return this.syncLocallyUpdatedFile({ - relativePath - }); - } + if (event === undefined) { + break; + } - // Perhaps the file has been moved; let's check by looking at the deleted files - const contentHash = await this.syncQueue.add(async () => { - const contentBytes = - await this.operations.read(relativePath); // this can throw FileNotFoundError - return hash(contentBytes); - }); - - if (contentHash == undefined) { - // The file was deleted before we had a chance to read it, no need to sync it here + try { + await this.processEvent(event); + } catch (e) { + if (e instanceof SyncResetError) { + this.logger.info("Drain interrupted by sync reset"); return; } - - const originalFile = findMatchingFile( - contentHash, - locallyPossiblyDeletedFiles + this.logger.error( + `Failed to process sync event ${event.type}: ${e}` ); - if (originalFile !== undefined) { - // `originalFile` hasn't been deleted but it got moved instead - /* eslint-disable no-restricted-syntax -- Comparing by property, not direct equality */ - locallyPossiblyDeletedFiles = - locallyPossiblyDeletedFiles.filter( - (item) => - item.relativePath !== originalFile.relativePath - ); - /* eslint-enable no-restricted-syntax */ - - this.logger.debug( - `Document '${originalFile.relativePath}' was not found under its current path in the database but was found under a different path (${relativePath}), scheduling sync to move it` - ); - - // We're outside of the pqueue, so we need to call the public wrapper - return this.syncLocallyUpdatedFile({ - oldPath: originalFile.relativePath, - relativePath - }); - } - - this.logger.debug( - `Document ${relativePath} not found in database, scheduling sync to create it` - ); - // We're outside of the pqueue, so we need to call the public wrapper - return this.syncLocallyCreatedFile(relativePath); - }) - ); - - // this has to happen strictly after the previous awaitAll, as that one - // might have removed some of the documents from the list - await awaitAll( - locallyPossiblyDeletedFiles.map(async ({ relativePath }) => { - this.logger.debug( - `Document ${relativePath} has been deleted locally, scheduling sync to delete it` - ); - - // We're outside of the pqueue, so we need to call the public wrapper - return this.syncLocallyDeletedFile(relativePath); - }) - ); + } + this.queue.consumeEvent(event); + // Reconciler runs after every wire-loop step; any record whose + // localPath drifted from remoteRelativePath gets a chance to + // converge before the next event. Best-effort — per-record + // failures are logged and retried on the next pass. + await this.reconciler.run(); + this.notifyRemainingOperationsChanged(); + } } - /** - * Create fake documents in the database for all files that are present locally - * and also exist remotely. This will stop the subequent syncs from duplicating - * the documents by creating the same documents from multiple clients. - */ - private async createFakeDocumentsFromRemoteState(): Promise { - if (this.database.getHasInitialSyncCompleted()) { + private async processEvent(event: SyncEvent): Promise { + try { + if (event.type === SyncEventType.LocalCreate) { + event.isProcessing = true; + } + + if (await this.skipIfOversized(event)) { + return; + } + + switch (event.type) { + case SyncEventType.LocalCreate: + await this.processCreate(event); + break; + case SyncEventType.LocalDelete: + await this.processDelete(event); + break; + case SyncEventType.LocalUpdate: + await this.processLocalUpdate(event); + break; + case SyncEventType.RemoteChange: + await this.processRemoteChange(event); + break; + } + } catch (e) { + // If a LocalCreate fails terminally, queued LocalDelete / + // LocalUpdate events whose `documentId` is this Create's + // `resolvers.promise` would `await` it forever — reject the + // resolver so they fail-fast with the same error class and + // hit their matching skip/log branch below. + // + // Only do this for terminal errors. `SyncResetError` is + // transient: drain returns without consuming the event, so + // the next drain retries the same Create. Rejecting the + // resolver now would permanently poison it, and the eventual + // `resolveCreate(...resolve)` after the retry succeeds is a + // no-op on an already-settled promise — leaving every + // dependent event stuck failing on `await event.documentId`. + if ( + event.type === SyncEventType.LocalCreate && + !(e instanceof SyncResetError) + ) { + event.resolvers.promise.catch(() => { + /* suppressed */ + }); + event.resolvers.reject(e); + } + + if (e instanceof FileNotFoundError) { + this.logger.info( + `Skipping sync event '${event.type}' because the file no longer exists` + ); + return; + } + if (e instanceof HttpClientError) { + this.logger.error( + `Server rejected ${event.type} request: ${e.message}` + ); + return; + } + throw e; + } + } + + private async skipIfOversized(event: SyncEvent): Promise { + let sizeInBytes = 0; + let relativePath: RelativePath = ""; + + switch (event.type) { + case SyncEventType.LocalDelete: + return false; + case SyncEventType.LocalCreate: + case SyncEventType.LocalUpdate: + sizeInBytes = await this.operations.getFileSize(event.path); + relativePath = event.path; + break; + case SyncEventType.RemoteChange: + if (event.remoteVersion.isDeleted) { + return false; + } + sizeInBytes = event.remoteVersion.contentSize; + ({ relativePath } = event.remoteVersion); + break; + } + + const oversizedEntry = this.getHistoryEntryForSkippedOversizedFile( + sizeInBytes, + relativePath + ); + if (oversizedEntry === undefined) { + return false; + } + + this.history.addHistoryEntry(oversizedEntry); + + if (event.type === SyncEventType.LocalCreate) { + event.resolvers.promise.catch(() => { + /* suppressed */ + }); + event.resolvers.reject(new Error("Create was cancelled")); + } + + // Advance the cursor so the server doesn't replay this update on every + // reconnect — the skip is permanent for this version. + if (event.type === SyncEventType.RemoteChange) { + this.queue.lastSeenUpdateId = event.remoteVersion.vaultUpdateId; + } + + return true; + } + + private getHistoryEntryForSkippedOversizedFile( + sizeInBytes: number, + relativePath: RelativePath + ): HistoryEntry | undefined { + const sizeInMB = Math.round(sizeInBytes / 1024 / 1024); + const { maxFileSizeMB } = this.settings.getSettings(); + if (sizeInMB > maxFileSizeMB) { + return { + status: SyncStatus.SKIPPED, + details: { + type: SyncType.SKIPPED as const, + relativePath + }, + message: `File size of ${sizeInMB} MB exceeds the maximum file size limit of ${maxFileSizeMB} MB`, + timestamp: new Date() + }; + } + } + + private async processCreate( + event: Extract + ): Promise { + const requestPath = event.path; + const contentBytes = await this.operations.read(requestPath); + const contentHash = await hash(contentBytes); + + // Use the path the pending create has when it reaches the wire loop. + // `updatePendingCreatePath` mutates queued creates when a not-yet-sent + // local file is renamed, so a renamed-away generation does not create + // a server document at a path that a newer local file has reused. + // + // `lastSeenUpdateIdForCreate(requestPath)` (rather than the contiguous + // `lastSeenUpdateId`) blocks the server from path-merging this POST + // into a doc we already track at the same path. Without that, a + // same-device rename race can alias two physically distinct local + // files onto one docId. See `SyncEventQueue.lastSeenUpdateIdForCreate`. + const response = await this.syncService.create({ + relativePath: requestPath, + lastSeenVaultUpdateId: + this.queue.lastSeenUpdateIdForCreate(requestPath), + contentBytes + }); + + // Same-docId collapse. While our LocalCreate sat in the queue, a + // RemoteCreate may have arrived for this same path. The wire-loop's + // `processRemoteCreateForNewDocument` would have built a record with + // `localPath === undefined` carrying the same docId the server is + // about to return us. `upsertRecord` keys by docId and merges in + // place, so the record we pass below collapses into that existing + // one — its claim is dropped and `localPath` becomes `event.path`. + // The reconciler will reconcile if `response.relativePath` differs. + let remoteHash = contentHash; + if (response.type === "MergingUpdate") { + const responseBytes = base64ToBytes(response.contentBase64); + // Read `event.path` live for both the write target and the + // cache key. A user rename arriving between HTTP-send and + // HTTP-response rewrites `event.path` via + // `updatePendingCreatePath`; the merge write must land on + // the current slot so the queued LocalUpdate that follows + // sees the merged bytes. + await this.operations.write( + event.path, + contentBytes, + responseBytes + ); + remoteHash = await hash(responseBytes); + await this.updateCache( + response.vaultUpdateId, + responseBytes, + event.path + ); + } else { + await this.updateCache( + response.vaultUpdateId, + contentBytes, + event.path + ); + } + + // Drop any stashed bytes for this docId — the file is on disk at + // event.path, so the reconciler shouldn't try to fetch & write + // its content. (The reconciler's job for this record is now just + // path placement, if needed.) + this.pendingPlacementContent.delete(response.documentId); + + // Snapshot `event.path` only after the write has settled. The + // write itself can drive synchronous watcher callbacks (e.g. + // an atomic-update fileSystemOperations that fires a "file + // changed" event back into the queue), and the test harness's + // user-facing renames also race here. Either path mutates + // `event.path` via `updatePendingCreatePath`; reading it once + // up front would lock in a stale slot and leave + // `record.localPath` pointing at a vacated path with no + // LocalRename ever materializing. + const localPath = event.path; + + await this.queue.resolveCreate(event, { + documentId: response.documentId, + parentVersionId: response.vaultUpdateId, + remoteRelativePath: response.relativePath, + remoteHash, + localPath + }); + + this.queue.lastSeenUpdateId = response.vaultUpdateId; + this.history.addHistoryEntry({ + status: SyncStatus.SUCCESS, + details: { type: SyncType.CREATE, relativePath: localPath }, + message: + response.type === "MergingUpdate" + ? "Created file and merged with existing remote version" + : "Successfully created file on the server", + author: response.userId, + timestamp: new Date(response.updatedDate) + }); + } + + private async processDelete( + event: Extract + ): Promise { + const documentId = await event.documentId; + const record = this.queue.getDocumentByDocumentId(documentId); + if ( + record?.localPath !== undefined && + record.localPath !== event.path + ) { + this.logger.debug( + `Skipping local-delete for ${documentId} at ${event.path}: ` + + `record now owns ${record.localPath}` + ); return; } - const [allLocalFiles, remote] = await awaitAll([ - this.operations.listFilesRecursively(), - this.syncQueue.add(async () => this.syncService.getAll()) - ]); + // The disk file is already gone when a LocalDelete reaches the wire + // loop. This is redundant for settled records deleted through + // `enqueue`, but load-bearing for creates that were deleted while the + // create request was still pending: their record only exists after the + // create ack resolves. + await this.queue.setLocalPath(documentId, undefined); - if (remote !== undefined) { - remote.latestDocuments - .filter( - (remoteDocument) => - allLocalFiles.includes(remoteDocument.relativePath) && - !remoteDocument.isDeleted && - this.database.getDocumentByDocumentId( - remoteDocument.documentId - ) === undefined - ) - .forEach((remoteDocument) => { - this.database.createNewEmptyDocument( - remoteDocument.documentId, - remoteDocument.vaultUpdateId, - remoteDocument.relativePath - ); - }); + const response = await this.syncService.delete({ + documentId + }); + + // Don't remove the doc from the queue or advance lastSeenUpdateId + // here. The server broadcasts the delete back to us over the + // WebSocket; that receipt drives `processRemoteDelete`'s cleanup + // and history entry. Keeping the entry in the map until then lets + // late remote updates be recognised as "file is missing" and + // skipped, instead of resurrecting the doc. + // + // Mark the doc as deletion-pending so the Reconciler doesn't + // resurrect it during the gap between HTTP-ack and WS-receipt. + // Without this, the LocalDelete enqueue's `setLocalPath(undefined)` + // leaves the record looking like a "needs initial placement" case + // to the Reconciler — which would then fetch the pre-delete bytes + // from the server and write them to disk. The mark also blocks + // any late RemoteChange from stashing pre-delete bytes into + // `pendingPlacementContent` (see processRemoteUpdate). The mark is + // cleared automatically by `removeDocumentById`. We also drop any + // already-stashed content for this doc since it cannot be placed. + this.queue.markServerDeletePending(documentId); + this.pendingPlacementContent.delete(documentId); + this.history.addHistoryEntry({ + status: SyncStatus.SUCCESS, + details: { + type: SyncType.DELETE, + relativePath: event.path + }, + message: "Successfully deleted file on the server", + author: response.userId, + timestamp: new Date(response.updatedDate) + }); + } + + private async processLocalUpdate( + event: Extract + ): Promise { + const documentId = await event.documentId; + + const record = this.queue.getDocumentByDocumentId(documentId); + if (record === undefined) { + // The doc was deleted between this event being queued and + // drained — skip silently. Common when a LocalDelete drains + // ahead of a LocalUpdate that was already in the queue. + this.logger.debug( + `Skipping local-update for ${documentId} — doc no longer tracked (deleted)` + ); + return; + } + // The record may exist with no local file (e.g. a pending-delete + // raced ahead and nulled out localPath). Nothing to upload from. + if (record.localPath === undefined) { + this.logger.debug( + `Skipping local-update for ${documentId} — record has no local file` + ); + return; + } + const contentBytes = await this.operations.read(record.localPath); + const contentHash = await hash(contentBytes); + + // For a user-driven rename the user's intent is `event.originalPath` + // — that's the rename target. For a content-only edit the user is + // agnostic to the path; sending one would be wrong if a remote + // rename processed first, because the server would interpret the + // user's (now-stale) path as a rename back. So content-only PUTs + // omit the path and the server keeps the doc at its current + // server-known location. + const renameTarget = event.isUserRename + ? event.originalPath + : undefined; + + const hashChanged = contentHash !== record.remoteHash; + const pathChanged = + renameTarget !== undefined && + record.remoteRelativePath !== renameTarget; + + if (!hashChanged && !pathChanged) { + this.logger.debug( + `File hash of ${record.localPath} matches last synced version; no need to sync` + ); + return; } - this.database.setHasInitialSyncCompleted(true); + const response = await this.sendUpdate({ + record, + relativePath: renameTarget, + contentBytes + }); + + if (response.isDeleted) { + await this.processRemoteDelete(record.localPath, { + ...response, + contentSize: 0, + isNewFile: false + }); + return; + } + + // Read `record.localPath` live via a fresh queue lookup: the + // queue's enqueue rename branch mutates the same record object + // in place across our await on `sendUpdate`, and a displaced-doc + // cleanup can null it out. The fresh lookup also re-widens the + // type back to `string | undefined` (the earlier guard narrowed + // it pre-await). The reconciler handles any further path + // placement after we write. + const livePath = + this.queue.getDocumentByDocumentId(documentId)?.localPath; + let remoteHash = contentHash; + if (response.type === "MergingUpdate") { + const responseBytes = base64ToBytes(response.contentBase64); + if (livePath !== undefined) { + await this.operations.write( + livePath, + contentBytes, + responseBytes + ); + } + remoteHash = await hash(responseBytes); + await this.updateCache( + response.vaultUpdateId, + responseBytes, + livePath ?? response.relativePath + ); + } else { + await this.updateCache( + response.vaultUpdateId, + contentBytes, + livePath ?? response.relativePath + ); + } + + await this.queue.upsertRecord({ + documentId: response.documentId, + parentVersionId: response.vaultUpdateId, + remoteRelativePath: response.relativePath, + remoteHash, + // localPath is owned by the watcher and the reconciler. Pass + // the value we observed pre-await purely as a hint for the + // placement-pending → placed transition; `upsertRecord` ignores + // it when an existing localPath is already set, so a watcher + // rename that landed during the HTTP roundtrip is preserved. + localPath: livePath + }); + this.queue.lastSeenUpdateId = response.vaultUpdateId; + + this.history.addHistoryEntry({ + status: SyncStatus.SUCCESS, + details: { + type: SyncType.UPDATE, + relativePath: livePath ?? response.relativePath + }, + message: + response.type === "MergingUpdate" + ? "Updated file and merged with remote changes" + : "Successfully updated file on the server", + author: response.userId, + timestamp: new Date(response.updatedDate) + }); + } + + private async processRemoteChange( + event: Extract + ): Promise { + const { remoteVersion } = event; + const trackedRecord = this.queue.getDocumentByDocumentId( + remoteVersion.documentId + ); + + if (remoteVersion.isDeleted) { + if (trackedRecord === undefined) { + // The doc isn't tracked locally — either we never had + // it (joined the vault after the delete) or a previous + // delete already cleaned it up. Just advance + // `lastSeenUpdateId` so we don't replay this on the + // next reconnect. + this.queue.lastSeenUpdateId = remoteVersion.vaultUpdateId; + return; + } + return this.processRemoteDelete( + trackedRecord.localPath, + remoteVersion + ); + } + + if ( + (trackedRecord?.parentVersionId ?? 0) >= remoteVersion.vaultUpdateId + ) { + this.queue.lastSeenUpdateId = remoteVersion.vaultUpdateId; + this.logger.debug( + `Document ${remoteVersion.relativePath} is already up-to-date or has newer local changes; skipping remote update` + ); + return; + } + + // Server-side delete is in flight: our HTTP DELETE has been acked + // but the WebSocket receipt that would `removeDocumentById` hasn't + // arrived yet. Any remote update we apply here would resurrect the + // doc — either by writing the pre-delete bytes to disk + // (`processRemoteUpdate` with localPath set) or by stashing them + // for the Reconciler (`processRemoteUpdate` with localPath + // undefined; reconciler is also gated, but stashing leaves + // `pendingPlacementContent` lingering which a same-docId + // re-creation could later misuse). Advance the watermark and + // discard; the eventual delete-receipt will clean up the record. + if ( + trackedRecord !== undefined && + this.queue.hasPendingServerDelete(trackedRecord.documentId) + ) { + this.queue.lastSeenUpdateId = remoteVersion.vaultUpdateId; + this.logger.debug( + `Discarding remote update for ${remoteVersion.documentId}: ` + + `local HTTP DELETE has been acked; awaiting WS receipt` + ); + return; + } + + if (trackedRecord !== undefined) { + // The doc is tracked, but the disk slot can be stale. One + // concrete race: a remote create quick-writes a file, a + // watcher rename/delete lands before the record is fully + // settled, and the record is left claiming a path that no + // longer exists. If no queued local operation owns that + // disappearance, clear the localPath and let + // processRemoteUpdate stash/place the active server version. + if (trackedRecord.localPath !== undefined) { + const fileExists = await this.operations.exists( + trackedRecord.localPath + ); + if ( + !fileExists && + !this.queue.hasPendingLocalEventsForDocumentId( + remoteVersion.documentId + ) + ) { + this.logger.debug( + `Remote update for ${remoteVersion.documentId}: ` + + `local file at ${trackedRecord.localPath} is missing; ` + + `clearing localPath for placement` + ); + await this.queue.setLocalPath( + trackedRecord.documentId, + undefined + ); + } + } + return this.processRemoteUpdate(trackedRecord, remoteVersion); + } + + if (!remoteVersion.isNewFile) { + this.queue.lastSeenUpdateId = remoteVersion.vaultUpdateId; + this.logger.debug( + `Ignoring stale RemoteChange for untracked, non-new document ${remoteVersion.documentId}` + ); + return; + } + + return this.processRemoteCreateForNewDocument(remoteVersion); + } + + private async processRemoteDelete( + localPath: RelativePath | undefined, + remoteVersion: DocumentVersionWithoutContent + ): Promise { + if (localPath !== undefined) { + // Verify the record still owns this disk slot before deleting. + // A same-path recreate (LocalCreate at this path resolving + // after we sent the server-delete for this doc) installs a + // new doc into byLocalPath but doesn't clear the old record's + // stale `localPath` field. When the WS broadcast for the old + // doc's deletion arrives, naively deleting at `localPath` + // would clobber the new doc's file. Skip the disk delete + // when the slot now belongs to a different doc; the queue + // record cleanup below still runs. + const currentOwner = this.queue.byLocalPath.get(localPath); + if ( + currentOwner === undefined || + currentOwner.documentId === remoteVersion.documentId + ) { + await this.operations.delete(localPath); + } else { + this.logger.debug( + `Skipping disk delete for ${remoteVersion.documentId} at ${localPath}: ` + + `slot is now owned by ${currentOwner.documentId}` + ); + } + } + await this.queue.removeDocumentById(remoteVersion.documentId); + + this.queue.lastSeenUpdateId = remoteVersion.vaultUpdateId; + + this.history.addHistoryEntry({ + status: SyncStatus.SUCCESS, + details: { + type: SyncType.DELETE, + relativePath: localPath ?? remoteVersion.relativePath + }, + message: + "Successfully deleted file which had been deleted remotely", + author: remoteVersion.userId, + timestamp: new Date(remoteVersion.updatedDate) + }); + } + + private async processRemoteUpdate( + record: DocumentRecord, + remoteVersion: DocumentVersionWithoutContent + ): Promise { + if ( + this.queue.hasPendingLocalEventsForDocumentId( + remoteVersion.documentId + ) + ) { + // The user has queued local edits for this doc. Apply them + // first — they'll round-trip to the server, get merged + // there, and broadcast back. If we processed this remote + // update now, `FileOperations.write` would receive + // `expected = current = the disk content (which already + // includes the user's pending edits)`, so the 3-way merge + // baseline collapses to "no local change vs base" and + // returns `theirs`, silently dropping the user's bytes. + // Re-enqueueing (rather than just deferring with a flag) + // is correct because by the time the queued local events + // drain, this remote update may be stale: our + // `parentVersionId` advances past `remoteVersion.vaultUpdateId`, + // and the next pass's standard "stale" check at the top of + // `processRemoteChange` will discard it. + // + // Broader concern (out of scope here): the 3-way merge + // baseline in `FileOperations.write` is the most-recent + // disk read at every callsite, not the previous server + // version. That's correct for the post-server-merge writes + // in `processCreate` / `processLocalUpdate` (we're + // applying the server's merged result to our potentially + // newer disk state), but fundamentally wrong as a base for + // a true 3-way merge. The defer gate above sidesteps the + // only call pattern where it actually loses data today. + void this.syncRemotelyUpdatedFile({ document: remoteVersion }); + return; + } + + const remoteContent = await this.syncService.getDocumentVersionContent({ + documentId: remoteVersion.documentId, + vaultUpdateId: remoteVersion.vaultUpdateId + }); + + // `record.localPath` may be undefined — the record was created on + // a previous remote-create whose target slot was occupied at + // receive time. In that case stash the bytes for the reconciler + // to write when it places the file; we still update the wire + // fields so the catch-up doesn't replay this version. + // + // The slot may also have been shadowed: the record still claims + // `localPath = P`, but `byLocalPath[P]` now points at a different + // doc (a same-path recreate installed a new owner without + // clearing this record's stale field — same race shape as the + // processRemoteDelete fix above). Writing to a shadowed slot + // would clobber the new owner's bytes. Clear the stale claim now + // so the reconciler treats this record as placement-pending; the + // closing `upsertRecord` no longer touches an existing record's + // localPath, so the clear has to happen explicitly here. + const claimedPath = record.localPath; + const livePath = + claimedPath !== undefined && + this.queue.byLocalPath.get(claimedPath)?.documentId === + record.documentId + ? claimedPath + : undefined; + if (claimedPath !== undefined && livePath === undefined) { + this.logger.debug( + `Remote update for ${record.documentId} at claimed ${claimedPath} ` + + `but slot is shadowed; clearing stale claim and deferring to reconciler` + ); + await this.queue.setLocalPath(record.documentId, undefined); + } + if (livePath !== undefined) { + const currentContent = await this.operations.read(livePath); + // Re-check the entry-time gate immediately before the disk + // mutation. The `await`s on `getDocumentVersionContent` and + // `read` open a TOCTOU window during which a LocalUpdate + // for this doc could have been enqueued by the watcher. If + // we proceeded, `operations.write` would receive + // `expected = current = disk-content-already-with-user-bytes`, + // collapsing the 3-way merge baseline and silently + // overwriting the user's pending edits with `theirs`. + // Re-enqueueing the RemoteChange is the same fix shape as + // the entry-time gate above; the next pass either applies + // it or discards it as stale via the standard check at the + // top of `processRemoteChange`. + if ( + this.queue.hasPendingLocalEventsForDocumentId( + remoteVersion.documentId + ) + ) { + void this.syncRemotelyUpdatedFile({ document: remoteVersion }); + return; + } + // Re-check shadowing as well: the same TOCTOU window + // (between `getDocumentVersionContent` and `read`, plus + // `read` itself) could see a same-path recreate steal the + // slot. If we lost ownership, fall through to the + // pendingPlacementContent stash by re-entering the + // RemoteChange — the next pass observes the updated + // byLocalPath and routes correctly. + if ( + this.queue.byLocalPath.get(livePath)?.documentId !== + record.documentId + ) { + void this.syncRemotelyUpdatedFile({ document: remoteVersion }); + return; + } + await this.operations.write( + livePath, + currentContent, + remoteContent + ); + await this.updateCache( + remoteVersion.vaultUpdateId, + remoteContent, + livePath + ); + } else { + this.pendingPlacementContent.set( + remoteVersion.documentId, + remoteContent + ); + await this.updateCache( + remoteVersion.vaultUpdateId, + remoteContent, + remoteVersion.relativePath + ); + } + + await this.queue.upsertRecord({ + documentId: record.documentId, + parentVersionId: remoteVersion.vaultUpdateId, + remoteRelativePath: remoteVersion.relativePath, + remoteHash: await hash(remoteContent), + localPath: livePath + }); + this.queue.lastSeenUpdateId = remoteVersion.vaultUpdateId; + + this.history.addHistoryEntry({ + status: SyncStatus.SUCCESS, + details: { + type: SyncType.UPDATE, + relativePath: livePath ?? remoteVersion.relativePath + }, + message: "Successfully applied remote update", + author: remoteVersion.userId, + timestamp: new Date(remoteVersion.updatedDate) + }); + } + + private async processRemoteCreateForNewDocument( + remoteVersion: DocumentVersionWithoutContent + ): Promise { + // Quick-write optimization: if the target slot is free right now + // (no disk file, no tracked record), fetch and write inline. The + // catch-up replay leans on this — without it, a freshly-joined + // client would upsert every doc with `localPath = undefined` + // and rely on the reconciler to fetch each one back. + // + // If the slot is occupied, defer: leave `localPath = undefined` + // and let the reconciler place once the slot frees. Per the + // design, no buffering at receive time — the reconciler will + // fetch on demand. + const target = remoteVersion.relativePath; + const slotFree = await this.canPlaceRemoteCreateAt(target); + + let localPath: RelativePath | undefined = undefined; + let remoteHash: string | undefined = undefined; + if (slotFree) { + const remoteContent = + await this.syncService.getDocumentVersionContent({ + documentId: remoteVersion.documentId, + vaultUpdateId: remoteVersion.vaultUpdateId + }); + if (!(await this.canPlaceRemoteCreateAt(target))) { + this.logger.debug( + `Quick-write for ${remoteVersion.documentId} at ${target} ` + + `became blocked while fetching content; deferring to reconciler` + ); + } else { + try { + remoteHash = await hash(remoteContent); + await this.queue.upsertRecord({ + documentId: remoteVersion.documentId, + parentVersionId: remoteVersion.vaultUpdateId, + remoteRelativePath: remoteVersion.relativePath, + remoteHash, + localPath: target + }); + const result = await this.operations.create( + target, + remoteContent + ); + const liveRecord = this.queue.getDocumentByDocumentId( + remoteVersion.documentId + ); + localPath = + liveRecord === undefined + ? result.actualPath + : liveRecord.localPath; + await this.updateCache( + remoteVersion.vaultUpdateId, + remoteContent, + localPath ?? remoteVersion.relativePath + ); + } catch (e) { + await this.queue.setLocalPath( + remoteVersion.documentId, + undefined + ); + if (!(e instanceof FileAlreadyExistsError)) { + throw e; + } + // TOCTOU: the slot was free at the pre-check but + // something landed there between then and now. Fall + // through to the no-localPath branch and let the + // reconciler retry placement once the slot frees. + this.logger.debug( + `Quick-write for ${remoteVersion.documentId} at ${target} ` + + `lost a TOCTOU race; deferring to reconciler` + ); + localPath = undefined; + } + } + } + + if ( + this.queue.getDocumentByDocumentId(remoteVersion.documentId) === + undefined + ) { + await this.queue.upsertRecord({ + documentId: remoteVersion.documentId, + parentVersionId: remoteVersion.vaultUpdateId, + remoteRelativePath: remoteVersion.relativePath, + // `remoteHash` is undefined when we deferred fetching content. + // Consumers (`processLocalUpdate`'s fast-skip, + // `findMatchingFile`'s offline-rename detection) treat + // undefined as "no comparison possible" and fall through to a + // real upload / no-match. The hash gets populated the next + // time we observe a real version (a remote update, or a + // local edit that triggers an upload). + remoteHash, + localPath + }); + } + + this.queue.lastSeenUpdateId = remoteVersion.vaultUpdateId; + + if (localPath !== undefined) { + this.history.addHistoryEntry({ + status: SyncStatus.SUCCESS, + details: { + type: SyncType.CREATE, + relativePath: localPath + }, + message: + "Successfully downloaded remote file which hadn't existed locally", + author: remoteVersion.userId, + timestamp: new Date(remoteVersion.updatedDate) + }); + } + } + + private async canPlaceRemoteCreateAt( + target: RelativePath + ): Promise { + return ( + !this.queue.hasPendingCreateForPath(target) && + !(await this.operations.exists(target)) && + this.queue.getRecordByLocalPath(target) === undefined + ); + } + + private async sendUpdate({ + record, + relativePath, + contentBytes + }: { + record: DocumentRecord; + // `undefined` for content-only edits; the server keeps the doc's + // current path. A string is sent only on a user-driven rename. + relativePath: RelativePath | undefined; + contentBytes: Uint8Array; + }): Promise { + const isText = + !isBinary(contentBytes) && + isFileTypeMergable( + relativePath ?? record.remoteRelativePath, + (await this.serverConfig.getConfig()).mergeableFileExtensions + ); + + const cachedVersion = this.contentCache.get(record.parentVersionId); + + if (isText && cachedVersion !== undefined) { + return this.syncService.putText({ + documentId: record.documentId, + parentVersionId: record.parentVersionId, + relativePath, + content: diff( + new TextDecoder().decode(cachedVersion), + new TextDecoder().decode(contentBytes) + ) + }); + } + + return this.syncService.putBinary({ + documentId: record.documentId, + parentVersionId: record.parentVersionId, + relativePath, + contentBytes + }); + } + + private async updateCache( + updateId: VaultUpdateId, + contentBytes: Uint8Array, + filePath: RelativePath + ): Promise { + if ( + isFileTypeMergable( + filePath, + (await this.serverConfig.getConfig()).mergeableFileExtensions + ) && + !isBinary(contentBytes) + ) { + this.contentCache.put(updateId, contentBytes); + } + } + + private notifyRemainingOperationsChanged(): void { + const currentCount = this.queue.pendingUpdateCount; + if (this.previousRemainingOperationsCount !== currentCount) { + this.previousRemainingOperationsCount = currentCount; + this.onRemainingOperationsCountChanged.trigger(currentCount); + } } } diff --git a/frontend/sync-client/src/sync-operations/types.ts b/frontend/sync-client/src/sync-operations/types.ts new file mode 100644 index 00000000..80a64cd7 --- /dev/null +++ b/frontend/sync-client/src/sync-operations/types.ts @@ -0,0 +1,74 @@ +import type { DocumentVersionWithoutContent } from "../services/types/DocumentVersionWithoutContent"; + +export type VaultUpdateId = number; +export type DocumentId = string; +export type RelativePath = string; + +export interface DocumentRecord { + documentId: DocumentId; + parentVersionId: VaultUpdateId; + // Hash of the last server version this client has observed for the doc. + // `undefined` means we have a record but haven't actually seen content + // yet — typically a remote-create whose target slot was occupied at + // receive time, where we deliberately defer the fetch to the reconciler. + // Consumers should treat undefined as "no comparison possible" (the + // fast-skip in `processLocalUpdate` falls through to a real upload). + remoteHash: string | undefined; + remoteRelativePath: RelativePath; + // Where the doc's file currently lives on disk. `undefined` means the doc + // has no local file yet — happens for a remote create whose + // `remoteRelativePath` slot was occupied at receive time. The reconciler + // will place the file once the slot frees, fetching content from the + // server on demand. + localPath: RelativePath | undefined; +} + +export interface StoredSyncState { + schemaVersion: number; + documents: DocumentRecord[] | undefined; + lastSeenUpdateId: VaultUpdateId | undefined; +} + +export enum SyncEventType { + LocalCreate = "local-create", + LocalUpdate = "local-update", // includes both content and path changes + LocalDelete = "local-delete", + RemoteChange = "remote-change" // includes every type of create/update/delete coming from the server +} + +export type FileSyncEvent = + | { type: SyncEventType.LocalCreate; path: RelativePath } + | { + type: SyncEventType.LocalUpdate; + path: RelativePath; + oldPath?: RelativePath; // oldPath is undefined for content changes + } + | { type: SyncEventType.LocalDelete; path: RelativePath } + | { + type: SyncEventType.RemoteChange; + remoteVersion: DocumentVersionWithoutContent; + }; + +export type SyncEvent = + | { + type: SyncEventType.LocalCreate; + path: RelativePath; // current path on disk; mutated in place by `updatePendingCreatePath` when the user renames mid-flight + isProcessing: boolean; // true once the wire loop has started this create; deletes after that must wait for the server ack + resolvers: PromiseWithResolvers; + } + | { + type: SyncEventType.LocalUpdate; + documentId: DocumentId | Promise; // if it's a promise, the promise is fulfilled once the document's create event is processed + path: RelativePath; // current path on disk + originalPath: RelativePath; // original path on disk when the event was queued + isUserRename: boolean; // true iff this event was queued because the user renamed the file + } + | { + type: SyncEventType.LocalDelete; + documentId: DocumentId | Promise; // if it's a promise, the promise is fulfilled once the document's create event is processed + path: RelativePath; // only used for showing on the UI + } + | { + type: SyncEventType.RemoteChange; + remoteVersion: DocumentVersionWithoutContent; + }; diff --git a/frontend/sync-client/src/sync-operations/unrestricted-syncer.ts b/frontend/sync-client/src/sync-operations/unrestricted-syncer.ts deleted file mode 100644 index e3964d30..00000000 --- a/frontend/sync-client/src/sync-operations/unrestricted-syncer.ts +++ /dev/null @@ -1,596 +0,0 @@ -import type { - Database, - DocumentRecord, - RelativePath -} from "../persistence/database"; - -import { diff } from "reconcile-text"; -import type { SyncService } from "../services/sync-service"; -import type { Logger } from "../tracing/logger"; -import type { - CommonHistoryEntry, - SyncCreateDetails, - SyncDeleteDetails, - SyncDetails, - SyncHistory, - SyncMovedDetails, - SyncUpdateDetails -} from "../tracing/sync-history"; -import { SyncStatus, SyncType } from "../tracing/sync-history"; -import { EMPTY_HASH, hash } from "../utils/hash"; - -import { base64ToBytes } from "byte-base64"; -import type { Settings } from "../persistence/settings"; -import type { FileOperations } from "../file-operations/file-operations"; -import { createPromise } from "../utils/create-promise"; -import { FileNotFoundError } from "../file-operations/file-not-found-error"; -import { SyncResetError } from "../services/sync-reset-error"; -import { globsToRegexes } from "../utils/globs-to-regexes"; -import type { DocumentVersion } from "../services/types/DocumentVersion"; -import type { DocumentUpdateResponse } from "../services/types/DocumentUpdateResponse"; -import type { DocumentVersionWithoutContent } from "../services/types/DocumentVersionWithoutContent"; -import type { FixedSizeDocumentCache } from "../utils/data-structures/fix-sized-cache"; -import { isFileTypeMergable } from "../utils/is-file-type-mergable"; -import { isBinary } from "../utils/is-binary"; -import type { ServerConfig } from "../services/server-config"; - -export class UnrestrictedSyncer { - private ignorePatterns: RegExp[]; - - public constructor( - private readonly logger: Logger, - private readonly database: Database, - private readonly settings: Settings, - private readonly syncService: SyncService, - private readonly operations: FileOperations, - private readonly history: SyncHistory, - private readonly contentCache: FixedSizeDocumentCache, - private readonly serverConfig: ServerConfig - ) { - this.ignorePatterns = globsToRegexes( - this.settings.getSettings().ignorePatterns, - this.logger - ); - - this.settings.onSettingsChanged.add((newSettings) => { - this.ignorePatterns = globsToRegexes( - newSettings.ignorePatterns, - this.logger - ); - }); - } - - public async unrestrictedSyncLocallyCreatedFile( - document: DocumentRecord - ): Promise { - const updateDetails: SyncCreateDetails = { - type: SyncType.CREATE, - relativePath: document.relativePath - }; - - return this.executeSync(updateDetails, async () => { - const originalRelativePath = document.relativePath; - if (document.isDeleted) { - this.logger.debug( - `Document ${originalRelativePath} has been already deleted, no need to create it` - ); - return; - } - - const contentBytes = - await this.operations.read(originalRelativePath); // this can throw FileNotFoundError - const contentHash = hash(contentBytes); - - const response = await this.syncService.create({ - documentId: document.documentId, - relativePath: originalRelativePath, - contentBytes - }); - - // In case a document with the same name (but different ID) had existed remotely that we haven't known about - if (response.relativePath != originalRelativePath) { - this.logger.debug( - `Document ${originalRelativePath} has been created remotely at a different path: ${response.relativePath}, moving it locally` - ); - await this.operations.move( - document.relativePath, - response.relativePath - ); // this can throw FileNotFoundError - } - - this.database.updateDocumentMetadata( - { - parentVersionId: response.vaultUpdateId, - hash: contentHash, - remoteRelativePath: response.relativePath - }, - document - ); - - this.database.addSeenUpdateId(response.vaultUpdateId); - await this.updateCache( - response.vaultUpdateId, - contentBytes, - response.relativePath - ); - - this.history.addHistoryEntry({ - status: SyncStatus.SUCCESS, - details: updateDetails, - message: `Successfully uploaded locally created file` - }); - }); - } - - public async unrestrictedSyncLocallyDeletedFile( - document: DocumentRecord - ): Promise { - const updateDetails: SyncDeleteDetails = { - type: SyncType.DELETE, - relativePath: document.relativePath - }; - - await this.executeSync(updateDetails, async () => { - const response = await this.syncService.delete({ - documentId: document.documentId, - relativePath: document.relativePath - }); - - this.database.updateDocumentMetadata( - { - parentVersionId: response.vaultUpdateId, - hash: EMPTY_HASH, - remoteRelativePath: document.relativePath - }, - document - ); - - this.database.addSeenUpdateId(response.vaultUpdateId); - - this.history.addHistoryEntry({ - status: SyncStatus.SUCCESS, - details: updateDetails, - message: `Successfully deleted locally deleted file on the server`, - author: response.userId - }); - }); - } - - public async unrestrictedSyncLocallyUpdatedFile({ - oldPath, - document, - // We use the same code path for both local and remote updates. We need to force the update - // if there are no local changes but we know that the remote version is newer. - force = false - }: { - oldPath?: RelativePath; - force?: boolean; - document: DocumentRecord; - }): Promise { - const updateDetails: SyncUpdateDetails | SyncMovedDetails = - oldPath !== undefined - ? { - type: SyncType.MOVE, - relativePath: document.relativePath, - movedFrom: oldPath - } - : { - type: SyncType.UPDATE, - relativePath: document.relativePath - }; - - await this.executeSync(updateDetails, async () => { - const originalRelativePath = document.relativePath; - - if (document.isDeleted || document.metadata === undefined) { - this.logger.debug( - `Document ${document.relativePath} has been already deleted, no need to update it` - ); - return; - } - - const contentBytes = await this.operations.read( - document.relativePath - ); // this can throw FileNotFoundError - let contentHash = hash(contentBytes); - - const areThereLocalChanges = !( - document.metadata.hash === contentHash && oldPath === undefined - ); - - let response: DocumentVersion | DocumentUpdateResponse | undefined = - undefined; - - if (areThereLocalChanges) { - const isText = - !isBinary(contentBytes) && - isFileTypeMergable( - document.relativePath, - (await this.serverConfig.getConfig()) - .mergeableFileExtensions - ); - const cachedVersion = this.contentCache.get( - document.metadata.parentVersionId - ); - - response = - isText && cachedVersion !== undefined - ? await this.syncService.putText({ - documentId: document.documentId, - parentVersionId: - document.metadata.parentVersionId, - relativePath: document.relativePath, - content: diff( - new TextDecoder().decode(cachedVersion), - new TextDecoder().decode(contentBytes) - ) - }) - : await this.syncService.putBinary({ - documentId: document.documentId, - parentVersionId: - document.metadata.parentVersionId, - relativePath: document.relativePath, - contentBytes - }); - } else { - if (!force) { - this.logger.debug( - `File hash of ${document.relativePath} matches with last synced version and the path hasn't changed; no need to sync` - ); - return; - } - - response = await this.syncService.get({ - documentId: document.documentId - }); - } - - // `document` is mutable and reflects the latest state in the local database - // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition - if (document.isDeleted) { - this.logger.info( - `Document ${document.relativePath} has been deleted before we could finish updating it` - ); - this.database.addSeenUpdateId(response.vaultUpdateId); - return; - } - - if ( - // `Syncer` creates fake local document metadata for all remote docs with invalid hashes. The parent IDs will likely match - // the latest versions so we still need to update the local versions to turn the fakes into real metadata. - document.metadata.parentVersionId > response.vaultUpdateId - ) { - this.logger.debug( - `Document ${document.relativePath} is already more up to date than the fetched version` - ); - this.database.addSeenUpdateId(response.vaultUpdateId); // in case the previous `vaultUpdateId` update hasn't made it through - return; - } - - if (response.isDeleted) { - return this.applyRemoteDeleteLocally(document, response); - } - - let actualPath = document.relativePath; - - if (response.relativePath != originalRelativePath) { - actualPath = response.relativePath; - // Make sure to update the remote relative path to avoid uploading - // the file as a result of this filesystem event. - document.metadata.remoteRelativePath = response.relativePath; - await this.operations.move( - document.relativePath, - response.relativePath - ); // this can throw FileNotFoundError - } - - if (!("type" in response) || response.type === "MergingUpdate") { - const responseBytes = base64ToBytes(response.contentBase64); - contentHash = hash(responseBytes); - - this.database.updateDocumentMetadata( - { - parentVersionId: response.vaultUpdateId, - hash: contentHash, - remoteRelativePath: response.relativePath - }, - document - ); - await this.operations.write( - actualPath, - contentBytes, - responseBytes - ); - await this.updateCache( - response.vaultUpdateId, - responseBytes, - actualPath - ); - - if (!force) { - this.history.addHistoryEntry({ - status: SyncStatus.SUCCESS, - details: updateDetails, - message: `The file we updated had been updated remotely, so we downloaded the merged version` - }); - } - } else { - this.database.updateDocumentMetadata( - { - parentVersionId: response.vaultUpdateId, - hash: contentHash, - remoteRelativePath: response.relativePath - }, - document - ); - await this.updateCache( - response.vaultUpdateId, - contentBytes, - actualPath - ); - } - - this.database.addSeenUpdateId(response.vaultUpdateId); - - const actualUpdateDetails: SyncUpdateDetails | SyncMovedDetails = - oldPath !== undefined || - response.relativePath != originalRelativePath - ? { - type: SyncType.MOVE, - relativePath: response.relativePath, - movedFrom: originalRelativePath - } - : { - type: SyncType.UPDATE, - relativePath: response.relativePath - }; - - if (areThereLocalChanges) { - this.history.addHistoryEntry({ - status: SyncStatus.SUCCESS, - details: actualUpdateDetails, - message: `Successfully uploaded locally updated file to the server`, - author: response.userId - }); - } else { - this.history.addHistoryEntry({ - status: SyncStatus.SUCCESS, - details: actualUpdateDetails, - message: `Successfully downloaded remotely updated file from the server`, - author: response.userId, - timestamp: new Date(response.updatedDate) - }); - } - }); - } - - public async unrestrictedSyncRemotelyUpdatedFile( - remoteVersion: DocumentVersionWithoutContent, - document?: DocumentRecord - ): Promise { - const updateDetails: SyncCreateDetails = { - type: SyncType.CREATE, - relativePath: remoteVersion.relativePath - }; - - await this.executeSync(updateDetails, async () => { - if (document?.metadata !== undefined) { - // If the file exists locally, let's pretend the user has updated it - // and deal with remote update/deletion within `unrestrictedSyncLocallyUpdatedFile` - if ( - document.metadata.parentVersionId >= - remoteVersion.vaultUpdateId - ) { - this.logger.debug( - `Document ${remoteVersion.relativePath} is already at least as up to date as the fetched version` - ); - - return; - } - - return this.unrestrictedSyncLocallyUpdatedFile({ - document, - force: true - }); - } else if (remoteVersion.isDeleted) { - // Either the document hasn't made it to us before and therefore we don't need to delete it, - // or we already have it, in which case the preceeding if would've dealt with it - this.logger.debug( - `Document ${remoteVersion.relativePath} has been deleted remotely, no need to sync` - ); - return; - } - - // Don't download oversized files - const historyEntryForSkippedOversizedFile = - this.getHistoryEntryForSkippedOversizedFile( - remoteVersion.contentSize, - remoteVersion.relativePath - ); - if (historyEntryForSkippedOversizedFile !== undefined) { - this.history.addHistoryEntry( - historyEntryForSkippedOversizedFile - ); - return; - } - - const contentBytes = - await this.syncService.getDocumentVersionContent({ - documentId: remoteVersion.documentId, - vaultUpdateId: remoteVersion.vaultUpdateId - }); - - // We're trying to create an entirely new document that didn't exist locally - document = this.database.getDocumentByDocumentId( - remoteVersion.documentId - ); - // It can happen that a concurrent sync operation has already created the document, so we can bail here - if (document !== undefined) { - this.logger.debug( - `Document ${remoteVersion.relativePath} has already been created locally, no need to create it again` - ); - return; - } - - await this.operations.ensureClearPath(remoteVersion.relativePath); - - const [promise, resolve] = createPromise(); - this.database.updateDocumentMetadata( - { - parentVersionId: remoteVersion.vaultUpdateId, - hash: hash(contentBytes), - remoteRelativePath: remoteVersion.relativePath - }, - this.database.createNewPendingDocument( - remoteVersion.documentId, - remoteVersion.relativePath, - promise - ) - ); - - await this.operations.create( - remoteVersion.relativePath, - contentBytes - ); - await this.updateCache( - remoteVersion.vaultUpdateId, - contentBytes, - remoteVersion.relativePath - ); - - resolve(); - this.database.removeDocumentPromise(promise); - - this.history.addHistoryEntry({ - status: SyncStatus.SUCCESS, - details: updateDetails, - message: `Successfully downloaded remote file which hadn't existed locally`, - author: remoteVersion.userId, - timestamp: new Date(remoteVersion.updatedDate) - }); - }); - } - - public async executeSync( - details: SyncDetails, - fn: () => Promise - ): Promise { - for (const pattern of this.ignorePatterns) { - if (pattern.test(details.relativePath)) { - this.logger.debug( - `File '${details.relativePath}' is ignored by the ignore pattern: ${pattern}` - ); - return; // bail without SKIPPED status because we were told to ignore this file and we shouldn't clutter up the history - } - } - - try { - // Only check the size of files which already exist locally. - if (await this.operations.exists(details.relativePath)) { - const sizeInBytes = await this.operations.getFileSize( - details.relativePath - ); - const historyEntryForSkippedOversizedFile = - this.getHistoryEntryForSkippedOversizedFile( - sizeInBytes, - details.relativePath - ); - if (historyEntryForSkippedOversizedFile !== undefined) { - this.history.addHistoryEntry( - historyEntryForSkippedOversizedFile - ); - return; - } - } - - return await fn(); - } catch (e) { - if (e instanceof FileNotFoundError) { - // A subsequent sync operation must have been creating to deal with this - this.logger.info( - `Skiping file '${details.relativePath}' because it no longer exists when trying to ${details.type.toLocaleLowerCase()} it` - ); - return; - } - if (e instanceof SyncResetError) { - this.logger.info( - `Interrupting sync operation because of a reset` - ); - return; - } else { - this.history.addHistoryEntry({ - status: SyncStatus.ERROR, - details, - message: `Failed to sync file '${details.relativePath}' because of ${e} when trying to ${details.type.toLocaleLowerCase()} it` - }); - throw e; - } - } - } - - private getHistoryEntryForSkippedOversizedFile( - sizeInBytes: number, - relativePath: RelativePath - ): CommonHistoryEntry | undefined { - const sizeInMB = Math.round(sizeInBytes / 1024 / 1024); - const { maxFileSizeMB } = this.settings.getSettings(); - if (sizeInMB > maxFileSizeMB) { - return { - status: SyncStatus.SKIPPED, - details: { - type: SyncType.SKIPPED, - relativePath - }, - message: `File size of ${sizeInMB} MB exceeds the maximum file size limit of ${ - maxFileSizeMB - } MB` - }; - } - } - - private async updateCache( - updateId: number, - contentBytes: Uint8Array, - filePath: RelativePath - ): Promise { - if ( - isFileTypeMergable( - filePath, - (await this.serverConfig.getConfig()).mergeableFileExtensions - ) && - !isBinary(contentBytes) - ) { - this.contentCache.put(updateId, contentBytes); - } - } - - private async applyRemoteDeleteLocally( - document: DocumentRecord, - response: DocumentVersion | DocumentUpdateResponse - ): Promise { - this.history.addHistoryEntry({ - status: SyncStatus.SUCCESS, - details: { - type: SyncType.DELETE, - relativePath: document.relativePath - }, - message: "File has been deleted remotely, so we deleted it locally", - author: response.userId, - timestamp: new Date(response.updatedDate) - }); - - this.database.delete(document.relativePath); - this.database.updateDocumentMetadata( - { - parentVersionId: response.vaultUpdateId, - hash: EMPTY_HASH, - remoteRelativePath: response.relativePath - }, - document - ); - - await this.operations.delete(document.relativePath); - - this.database.addSeenUpdateId(response.vaultUpdateId); - } -} diff --git a/frontend/sync-client/src/tracing/sync-history.ts b/frontend/sync-client/src/tracing/sync-history.ts index 31f77283..c0d32032 100644 --- a/frontend/sync-client/src/tracing/sync-history.ts +++ b/frontend/sync-client/src/tracing/sync-history.ts @@ -2,7 +2,7 @@ import { MAX_HISTORY_ENTRY_COUNT, TIMEOUT_FOR_MERGING_HISTORY_ENTRIES_IN_SECONDS } from "../consts"; -import type { RelativePath } from "../persistence/database"; +import type { RelativePath } from "../sync-operations/types"; import type { Logger } from "./logger"; import { removeFromArray } from "../utils/remove-from-array"; import { EventListeners } from "../utils/data-structures/event-listeners"; @@ -28,7 +28,7 @@ export interface SyncDeleteDetails { relativePath: RelativePath; } -export interface SyncSkippedDetails { +interface SyncSkippedDetails { type: SyncType.SKIPPED; relativePath: RelativePath; } @@ -40,12 +40,15 @@ export type SyncDetails = | SyncMovedDetails | SyncSkippedDetails; -export interface CommonHistoryEntry { +export interface HistoryEntry { status: SyncStatus; message: string; details: SyncDetails; + timestamp: Date; + // `author` is the server-side user id and only exists for entries that + // round-tripped through the server. Local-only entries (e.g. SKIPPED) + // legitimately have no author. author?: string; - timestamp?: Date; } export enum SyncType { @@ -62,8 +65,6 @@ export enum SyncStatus { SKIPPED = "SKIPPED" } -export type HistoryEntry = CommonHistoryEntry & { timestamp: Date }; - export interface HistoryStats { success: number; error: number; @@ -88,30 +89,25 @@ export class SyncHistory { } /** - * Insert the entry at the beginning of the history list. If the entry - * already in the list, it will get moved to the beginning and updated. - * - * If the entry list is too long, the oldest entry will be removed. - */ - public addHistoryEntry(entry: CommonHistoryEntry): void { - const historyEntry = { - ...entry, - timestamp: entry.timestamp ?? new Date() - }; - - const candidate = this.findSimilarRecentUpdateEntry(historyEntry); + * Insert the entry at the beginning of the history list. If the entry + * already in the list, it will get moved to the beginning and updated. + * + * If the entry list is too long, the oldest entry will be removed. + */ + public addHistoryEntry(entry: HistoryEntry): void { + const candidate = this.findSimilarRecentUpdateEntry(entry); if (candidate !== undefined) { removeFromArray(this._entries, candidate); } // Insert the entry at the beginning - this._entries.unshift(historyEntry); + this._entries.unshift(entry); if (this._entries.length > MAX_HISTORY_ENTRY_COUNT) { this._entries.pop(); } - this.updateSuccessCount(historyEntry); + this.updateSuccessCount(entry); } public reset(): void { diff --git a/frontend/sync-client/tsconfig.json b/frontend/sync-client/tsconfig.json index 92caf072..98870f32 100644 --- a/frontend/sync-client/tsconfig.json +++ b/frontend/sync-client/tsconfig.json @@ -12,7 +12,5 @@ "declaration": true, "declarationDir": "./dist/types" }, - "exclude": [ - "./dist" - ] + "exclude": ["./dist"] } diff --git a/frontend/sync-client/webpack.config.js b/frontend/sync-client/webpack.config.js index b7c3a3fd..413bfeba 100644 --- a/frontend/sync-client/webpack.config.js +++ b/frontend/sync-client/webpack.config.js @@ -49,11 +49,6 @@ module.exports = [ type: "umd" }, globalObject: "this" - }, - resolve: { - fallback: { - ws: false // Exclude `ws` from the browser bundle - } } }), merge(common, { @@ -62,10 +57,6 @@ module.exports = [ path: path.resolve(__dirname, "dist"), filename: "sync-client.node.js", libraryTarget: "commonjs2" - }, - externals: { - bufferutil: "bufferutil", - "utf-8-validate": "utf-8-validate" // required for ws: https://github.com/websockets/ws/issues/2245#issuecomment-2250318733 } }) ];