From be1635c26e995d9ca86247307626dcd69247c31c Mon Sep 17 00:00:00 2001 From: Andras Schmelczer Date: Sun, 16 Nov 2025 22:10:22 +0000 Subject: [PATCH] Improve network usage for small text changes (#166) --- frontend/eslint.config.mjs | 7 +- frontend/package-lock.json | 21 +- frontend/sync-client/package.json | 2 +- .../src/file-operations/file-operations.ts | 3 +- .../sync-client/src/services/sync-service.ts | 76 +++++- .../types/UpdateTextDocumentVersion.ts | 7 + frontend/sync-client/src/sync-client.ts | 9 +- .../sync-client/src/sync-operations/syncer.ts | 5 +- .../sync-operations/unrestricted-syncer.ts | 71 +++++- .../src/utils/fix-sized-cache.test.ts | 239 ++++++++++++++++++ .../sync-client/src/utils/fix-sized-cache.ts | 113 +++++++++ frontend/sync-client/src/utils/is-binary.ts | 16 ++ sync-server/Cargo.lock | 7 +- sync-server/Cargo.toml | 2 +- sync-server/src/server.rs | 8 +- sync-server/src/server/requests.rs | 20 +- sync-server/src/server/update_document.rs | 101 ++++++-- sync-server/src/utils.rs | 1 + sync-server/src/utils/is_binary.rs | 26 ++ sync-server/src/utils/rotating_file_writer.rs | 25 +- 20 files changed, 697 insertions(+), 62 deletions(-) create mode 100644 frontend/sync-client/src/services/types/UpdateTextDocumentVersion.ts create mode 100644 frontend/sync-client/src/utils/fix-sized-cache.test.ts create mode 100644 frontend/sync-client/src/utils/fix-sized-cache.ts create mode 100644 frontend/sync-client/src/utils/is-binary.ts create mode 100644 sync-server/src/utils/is_binary.rs diff --git a/frontend/eslint.config.mjs b/frontend/eslint.config.mjs index db648d46..8e13be78 100644 --- a/frontend/eslint.config.mjs +++ b/frontend/eslint.config.mjs @@ -33,12 +33,7 @@ export default [ "@typescript-eslint/class-methods-use-this": "off", "@typescript-eslint/consistent-return": "off", "@typescript-eslint/no-unsafe-argument": "off", - "@typescript-eslint/max-params": [ - "error", - { - max: 6 - } - ], + "@typescript-eslint/max-params": "off", "@typescript-eslint/no-magic-numbers": "off", "@typescript-eslint/prefer-readonly-parameter-types": "off", "@typescript-eslint/naming-convention": "off", diff --git a/frontend/package-lock.json b/frontend/package-lock.json index d1497401..31dec1fb 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -1583,7 +1583,9 @@ } }, "node_modules/brace-expansion": { - "version": "1.1.11", + "version": "1.1.12", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz", + "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==", "dev": true, "license": "MIT", "dependencies": { @@ -2742,7 +2744,9 @@ } }, "node_modules/js-yaml": { - "version": "4.1.0", + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.1.tgz", + "integrity": "sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA==", "dev": true, "license": "MIT", "dependencies": { @@ -3487,6 +3491,7 @@ "version": "0.5.0", "resolved": "https://registry.npmjs.org/reconcile-text/-/reconcile-text-0.5.0.tgz", "integrity": "sha512-zki3lqw9Oxdhm9ZvDN17VyYoL1Isc8BEL07ILVDE2yGfNEI7thrkczoNCUr+hkFU2rzZtfxECTG0b7p61AJ6wg==", + "dev": true, "license": "MIT" }, "node_modules/regex-parser": { @@ -4687,7 +4692,7 @@ "byte-base64": "^1.1.0", "minimatch": "^10.0.1", "p-queue": "^8.1.0", - "reconcile-text": "^0.5.0", + "reconcile-text": "^0.7.1", "uuid": "^13.0.0" }, "devDependencies": { @@ -4703,7 +4708,9 @@ } }, "sync-client/node_modules/brace-expansion": { - "version": "2.0.1", + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz", + "integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==", "license": "MIT", "dependencies": { "balanced-match": "^1.0.0" @@ -4722,6 +4729,12 @@ "url": "https://github.com/sponsors/isaacs" } }, + "sync-client/node_modules/reconcile-text": { + "version": "0.7.1", + "resolved": "https://registry.npmjs.org/reconcile-text/-/reconcile-text-0.7.1.tgz", + "integrity": "sha512-khedcYvAKs7ELKh5Z8mz2vyomMY5TqznV1dB+k/7qUAX9cheMNN5/EPJVQYZepOMunYbnQitvhFJX3kD4IMcNw==", + "license": "MIT" + }, "test-client": { "version": "0.9.2", "bin": { diff --git a/frontend/sync-client/package.json b/frontend/sync-client/package.json index 6aa803cf..6483c93c 100644 --- a/frontend/sync-client/package.json +++ b/frontend/sync-client/package.json @@ -16,7 +16,7 @@ "byte-base64": "^1.1.0", "minimatch": "^10.0.1", "p-queue": "^8.1.0", - "reconcile-text": "^0.5.0", + "reconcile-text": "^0.7.1", "uuid": "^13.0.0" }, "devDependencies": { diff --git a/frontend/sync-client/src/file-operations/file-operations.ts b/frontend/sync-client/src/file-operations/file-operations.ts index 56ce0e51..e85c7fda 100644 --- a/frontend/sync-client/src/file-operations/file-operations.ts +++ b/frontend/sync-client/src/file-operations/file-operations.ts @@ -3,8 +3,9 @@ import type { FileSystemOperations } from "./filesystem-operations"; import type { Database, RelativePath } from "../persistence/database"; import { SafeFileSystemOperations } from "./safe-filesystem-operations"; import type { TextWithCursors } from "reconcile-text"; -import { isBinary, reconcile } from "reconcile-text"; +import { reconcile } from "reconcile-text"; import { isFileTypeMergable } from "../utils/is-file-type-mergable"; +import { isBinary } from "../utils/is-binary"; export class FileOperations { private static readonly PARENTHESES_REGEX = / \((\d+)\)$/; diff --git a/frontend/sync-client/src/services/sync-service.ts b/frontend/sync-client/src/services/sync-service.ts index 8ce9c56a..5bbf01e6 100644 --- a/frontend/sync-client/src/services/sync-service.ts +++ b/frontend/sync-client/src/services/sync-service.ts @@ -16,6 +16,7 @@ import type { DocumentVersion } from "./types/DocumentVersion"; import type { FetchLatestDocumentsResponse } from "./types/FetchLatestDocumentsResponse"; import type { PingResponse } from "./types/PingResponse"; import type { DeleteDocumentVersion } from "./types/DeleteDocumentVersion"; +import type { UpdateTextDocumentVersion } from "./types/UpdateTextDocumentVersion"; export interface CheckConnectionResult { isSuccessful: boolean; @@ -102,7 +103,59 @@ export class SyncService { }); } - public async put({ + public async putText({ + parentVersionId, + documentId, + relativePath, + content + }: { + parentVersionId: VaultUpdateId; + documentId: DocumentId; + relativePath: RelativePath; + content: (number | string)[]; + }): Promise { + return this.withRetries(async () => { + this.logger.debug( + `Updating text document ${documentId} with parent version ${parentVersionId} and relative path ${relativePath}` + ); + + const request: UpdateTextDocumentVersion = { + parentVersionId, + relativePath, + content + }; + + const response = await this.client( + this.getUrl(`/documents/${documentId}/text`), + { + method: "PUT", + body: JSON.stringify(request), + headers: this.getDefaultHeaders({ type: "json" }) + } + ); + + const result: SerializedError | DocumentUpdateResponse = + (await response.json()) as // eslint-disable-line @typescript-eslint/no-unsafe-type-assertion + | SerializedError + | DocumentUpdateResponse; + + if ("errorType" in result) { + throw new Error( + `Failed to update document: ${SyncService.formatError(result)}` + ); + } + + this.logger.debug( + `Updated document ${JSON.stringify(result)} with id ${ + result.documentId + }}` + ); + + return result; + }); + } + + public async putBinary({ parentVersionId, documentId, relativePath, @@ -115,7 +168,7 @@ export class SyncService { }): Promise { return this.withRetries(async () => { this.logger.debug( - `Updating document ${documentId} with parent version ${parentVersionId} and relative path ${relativePath}` + `Updating binary document ${documentId} with parent version ${parentVersionId} and relative path ${relativePath}` ); const formData = new FormData(); formData.append("parent_version_id", parentVersionId.toString()); @@ -126,7 +179,7 @@ export class SyncService { ); const response = await this.client( - this.getUrl(`/documents/${documentId}`), + this.getUrl(`/documents/${documentId}/binary`), { method: "PUT", body: formData, @@ -171,10 +224,7 @@ export class SyncService { { method: "DELETE", body: JSON.stringify(request), - headers: { - "Content-Type": "application/json", - ...this.getDefaultHeaders() - } + headers: this.getDefaultHeaders({ type: "json" }) } ); @@ -297,11 +347,19 @@ export class SyncService { return `${safeRemoteUri}/vaults/${vaultName}${path}`; } - private getDefaultHeaders(): Record { - return { + private getDefaultHeaders( + { type }: { type?: "json" } = { type: undefined } + ): Record { + const headers: Record = { "device-id": this.deviceId, authorization: `Bearer ${this.settings.getSettings().token}` }; + + if (type === "json") { + headers["Content-Type"] = "application/json"; + } + + return headers; } private async withRetries(fn: () => Promise): Promise { diff --git a/frontend/sync-client/src/services/types/UpdateTextDocumentVersion.ts b/frontend/sync-client/src/services/types/UpdateTextDocumentVersion.ts new file mode 100644 index 00000000..b3a5499b --- /dev/null +++ b/frontend/sync-client/src/services/types/UpdateTextDocumentVersion.ts @@ -0,0 +1,7 @@ +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. + +export interface UpdateTextDocumentVersion { + parentVersionId: number; + relativePath: string; + content: (number | string)[]; +} diff --git a/frontend/sync-client/src/sync-client.ts b/frontend/sync-client/src/sync-client.ts index 78beb910..33a1cac5 100644 --- a/frontend/sync-client/src/sync-client.ts +++ b/frontend/sync-client/src/sync-client.ts @@ -21,13 +21,13 @@ import { CursorTracker } from "./sync-operations/cursor-tracker"; import type { CursorSpan } from "./services/types/CursorSpan"; import type { MaybeOutdatedClientCursors } from "./types/maybe-outdated-client-cursors"; import { FileChangeNotifier } from "./sync-operations/file-change-notifier"; +import { FixedSizeDocumentCache } from "./utils/fix-sized-cache"; export class SyncClient { private static readonly MINIMUM_SAVE_INTERVAL_MS = 1000; private hasStartedOfflineSync = false; private hasFinishedOfflineSync = false; - // eslint-disable-next-line @typescript-eslint/max-params private constructor( private readonly history: SyncHistory, private readonly settings: Settings, @@ -135,13 +135,15 @@ export class SyncClient { nativeLineEndings ); + const contentCache = new FixedSizeDocumentCache(1024 * 1024 * 2); // 2 MB cache const unrestrictedSyncer = new UnrestrictedSyncer( logger, database, settings, syncService, fileOperations, - history + history, + contentCache ); const syncer = new Syncer( @@ -150,7 +152,8 @@ export class SyncClient { settings, syncService, fileOperations, - unrestrictedSyncer + unrestrictedSyncer, + contentCache ); const webSocketManager = new WebSocketManager( diff --git a/frontend/sync-client/src/sync-operations/syncer.ts b/frontend/sync-client/src/sync-operations/syncer.ts index 03041a36..1c8ac36e 100644 --- a/frontend/sync-client/src/sync-operations/syncer.ts +++ b/frontend/sync-client/src/sync-operations/syncer.ts @@ -17,6 +17,7 @@ import { createPromise } from "../utils/create-promise"; import { SyncResetError } from "../services/sync-reset-error"; import { Locks } from "../utils/locks"; import type { DocumentVersionWithoutContent } from "../services/types/DocumentVersionWithoutContent"; +import type { FixedSizeDocumentCache } from "../utils/fix-sized-cache"; export class Syncer { private readonly remoteDocumentsLock: Locks; @@ -33,7 +34,8 @@ export class Syncer { settings: Settings, private readonly syncService: SyncService, private readonly operations: FileOperations, - private readonly internalSyncer: UnrestrictedSyncer + private readonly internalSyncer: UnrestrictedSyncer, + private readonly contentCache: FixedSizeDocumentCache ) { this.syncQueue = new PQueue({ concurrency: settings.getSettings().syncConcurrency @@ -250,6 +252,7 @@ export class Syncer { public async reset(): Promise { await this.waitUntilFinished(); + this.contentCache.clear(); } public async syncRemotelyUpdatedFile( diff --git a/frontend/sync-client/src/sync-operations/unrestricted-syncer.ts b/frontend/sync-client/src/sync-operations/unrestricted-syncer.ts index 1f7e908c..f9f6e2c1 100644 --- a/frontend/sync-client/src/sync-operations/unrestricted-syncer.ts +++ b/frontend/sync-client/src/sync-operations/unrestricted-syncer.ts @@ -4,6 +4,7 @@ import type { RelativePath } from "../persistence/database"; +import { diff } from "reconcile-text"; import type { SyncService } from "../services/sync-service"; import type { Logger } from "../tracing/logger"; import type { @@ -27,6 +28,9 @@ import { globsToRegexes } from "../utils/globs-to-regexes"; import type { DocumentVersion } from "../services/types/DocumentVersion"; import type { DocumentUpdateResponse } from "../services/types/DocumentUpdateResponse"; import type { DocumentVersionWithoutContent } from "../services/types/DocumentVersionWithoutContent"; +import type { FixedSizeDocumentCache } from "../utils/fix-sized-cache"; +import { isFileTypeMergable } from "../utils/is-file-type-mergable"; +import { isBinary } from "../utils/is-binary"; export class UnrestrictedSyncer { private ignorePatterns: RegExp[]; @@ -37,7 +41,8 @@ export class UnrestrictedSyncer { private readonly settings: Settings, private readonly syncService: SyncService, private readonly operations: FileOperations, - private readonly history: SyncHistory + private readonly history: SyncHistory, + private readonly contentCache: FixedSizeDocumentCache ) { this.ignorePatterns = globsToRegexes( this.settings.getSettings().ignorePatterns, @@ -87,8 +92,12 @@ export class UnrestrictedSyncer { }, document ); - this.database.addSeenUpdateId(response.vaultUpdateId); + this.updateCache( + response.vaultUpdateId, + contentBytes, + response.relativePath + ); this.history.addHistoryEntry({ status: SyncStatus.SUCCESS, @@ -178,12 +187,32 @@ export class UnrestrictedSyncer { undefined; if (areThereLocalChanges) { - response = await this.syncService.put({ - documentId: document.documentId, - parentVersionId: document.metadata.parentVersionId, - relativePath: document.relativePath, - contentBytes - }); + const isText = + !isBinary(contentBytes) && + isFileTypeMergable(document.relativePath); + const cachedVersion = this.contentCache.get( + document.metadata.parentVersionId + ); + + response = + isText && cachedVersion !== undefined + ? await this.syncService.putText({ + documentId: document.documentId, + parentVersionId: + document.metadata.parentVersionId, + relativePath: document.relativePath, + content: diff( + new TextDecoder().decode(cachedVersion), + new TextDecoder().decode(contentBytes) + ) + }) + : await this.syncService.putBinary({ + documentId: document.documentId, + parentVersionId: + document.metadata.parentVersionId, + relativePath: document.relativePath, + contentBytes + }); } else { if (!force) { this.logger.debug( @@ -274,12 +303,16 @@ export class UnrestrictedSyncer { }, document ); - await this.operations.write( actualPath, contentBytes, responseBytes ); + this.updateCache( + response.vaultUpdateId, + responseBytes, + actualPath + ); if (!force) { this.history.addHistoryEntry({ @@ -297,6 +330,11 @@ export class UnrestrictedSyncer { }, document ); + this.updateCache( + response.vaultUpdateId, + contentBytes, + actualPath + ); } this.database.addSeenUpdateId(response.vaultUpdateId); @@ -423,6 +461,11 @@ export class UnrestrictedSyncer { remoteVersion.relativePath, contentBytes ); + this.updateCache( + remoteVersion.vaultUpdateId, + contentBytes, + remoteVersion.relativePath + ); resolve(); this.database.removeDocumentPromise(promise); @@ -513,4 +556,14 @@ export class UnrestrictedSyncer { }; } } + + private updateCache( + updateId: number, + contentBytes: Uint8Array, + filePath: RelativePath + ): void { + if (isFileTypeMergable(filePath) && !isBinary(contentBytes)) { + this.contentCache.put(updateId, contentBytes); + } + } } diff --git a/frontend/sync-client/src/utils/fix-sized-cache.test.ts b/frontend/sync-client/src/utils/fix-sized-cache.test.ts new file mode 100644 index 00000000..46bc4144 --- /dev/null +++ b/frontend/sync-client/src/utils/fix-sized-cache.test.ts @@ -0,0 +1,239 @@ +import { describe, it } from "node:test"; +import assert from "node:assert"; +import { FixedSizeDocumentCache } from "./fix-sized-cache"; + +describe("fixedSizeDocumentCache", () => { + it("happyPath", async () => { + const cache = new FixedSizeDocumentCache(4); + const doc1 = new Uint8Array([1, 2]); + const doc2 = new Uint8Array([3, 4]); + const doc3 = new Uint8Array([5, 6]); + + cache.put(1, doc1); + assert.equal(cache.get(1), doc1); + cache.put(2, doc2); + assert.equal(cache.get(1), doc1); + assert.equal(cache.get(2), doc2); + cache.put(3, doc3); + assert.equal(cache.get(1), undefined); + assert.equal(cache.get(2), doc2); + assert.equal(cache.get(3), doc3); + }); + + it("updateExistingEntry", async () => { + const cache = new FixedSizeDocumentCache(4); + const doc1_v1 = new Uint8Array([1, 2]); + const doc1_v2 = new Uint8Array([3, 4]); + const doc2 = new Uint8Array([5, 6]); + + cache.put(1, doc1_v1); + assert.equal(cache.get(1), doc1_v1); + cache.put(2, doc2); + assert.equal(cache.get(1), doc1_v1); + assert.equal(cache.get(2), doc2); + cache.put(1, doc1_v2); // Update doc1 + assert.equal(cache.get(1), doc1_v2); + assert.equal(cache.get(2), doc2); + }); + + it("evictOldestEntry", async () => { + const cache = new FixedSizeDocumentCache(4); + const doc1 = new Uint8Array([1, 2]); + const doc2 = new Uint8Array([3, 4]); + const doc3 = new Uint8Array([5, 6]); + + cache.put(1, doc1); + cache.put(2, doc2); + assert.equal(cache.get(2), doc2); + assert.equal(cache.get(1), doc1); + cache.put(3, doc3); + assert.equal(cache.get(1), doc1); + assert.equal(cache.get(2), undefined); + assert.equal(cache.get(3), doc3); + }); + + it("tooLargeEntry", async () => { + const cache = new FixedSizeDocumentCache(2); + const doc1 = new Uint8Array([1, 2, 3]); + + cache.put(1, doc1); + assert.equal(cache.get(1), undefined); + }); + + it("multipleEvictionsInSinglePut", async () => { + const cache = new FixedSizeDocumentCache(10); + const doc1 = new Uint8Array([1, 2]); + const doc2 = new Uint8Array([3, 4]); + const doc3 = new Uint8Array([5, 6]); + const doc4 = new Uint8Array([7, 8, 9, 10, 11, 12, 13, 14]); // 8 bytes + + cache.put(1, doc1); + cache.put(2, doc2); + cache.put(3, doc3); + // Cache now has 6 bytes total + + cache.put(4, doc4); // Should evict doc1 and doc2 to make room (total: 2+8=10) + assert.equal(cache.get(1), undefined); // Evicted + assert.equal(cache.get(2), undefined); // Evicted + assert.equal(cache.get(3), doc3); // Still present + assert.equal(cache.get(4), doc4); + }); + + it("clearCache", async () => { + const cache = new FixedSizeDocumentCache(10); + const doc1 = new Uint8Array([1, 2]); + const doc2 = new Uint8Array([3, 4]); + + cache.put(1, doc1); + cache.put(2, doc2); + assert.equal(cache.get(1), doc1); + assert.equal(cache.get(2), doc2); + + cache.clear(); + assert.equal(cache.get(1), undefined); + assert.equal(cache.get(2), undefined); + + // Should be able to add entries after clear + cache.put(3, doc1); + assert.equal(cache.get(3), doc1); + }); + + it("getNonExistentKey", async () => { + const cache = new FixedSizeDocumentCache(10); + const doc1 = new Uint8Array([1, 2]); + cache.put(1, doc1); + assert.equal(cache.get(999), undefined); + }); + + it("updateEntryWithDifferentSizeTriggeringEviction", async () => { + const cache = new FixedSizeDocumentCache(6); + const doc1_v1 = new Uint8Array([1, 2]); + const doc1_v2 = new Uint8Array([1, 2, 3, 4]); // Larger version + const doc2 = new Uint8Array([5, 6]); + const doc3 = new Uint8Array([7, 8]); + + cache.put(1, doc1_v1); + cache.put(2, doc2); + cache.put(3, doc3); + + // Update doc1 with larger version, should evict doc2 + cache.put(1, doc1_v2); + + assert.equal(cache.get(1), doc1_v2); + assert.equal(cache.get(2), undefined); // Evicted + assert.equal(cache.get(3), doc3); + }); + + it("singleItemCache", async () => { + const cache = new FixedSizeDocumentCache(2); + const doc1 = new Uint8Array([1, 2]); + const doc2 = new Uint8Array([3, 4]); + + cache.put(1, doc1); + assert.equal(cache.get(1), doc1); + + cache.put(2, doc2); + assert.equal(cache.get(1), undefined); // Evicted + assert.equal(cache.get(2), doc2); + }); + + it("multipleGetsOnSameEntry", async () => { + const cache = new FixedSizeDocumentCache(4); + const doc1 = new Uint8Array([1, 2]); + const doc2 = new Uint8Array([3, 4]); + const doc3 = new Uint8Array([5, 6]); + + cache.put(1, doc1); + cache.put(2, doc2); + + // Multiple gets on doc1 + cache.get(1); + cache.get(1); + cache.get(1); + + // Order should be: 2 (LRU), 1 (MRU) + cache.put(3, doc3); + + assert.equal(cache.get(1), doc1); + assert.equal(cache.get(2), undefined); // Evicted + assert.equal(cache.get(3), doc3); + }); + + it("exactlySizedEntry", async () => { + const cache = new FixedSizeDocumentCache(4); + const doc1 = new Uint8Array([1, 2, 3, 4]); // Exactly cache size + + cache.put(1, doc1); + assert.equal(cache.get(1), doc1); + + const doc2 = new Uint8Array([5, 6]); + cache.put(2, doc2); + + // doc1 should be evicted to make room for doc2 + assert.equal(cache.get(1), undefined); + assert.equal(cache.get(2), doc2); + }); + + it("updateEntryMakesItMostRecent", async () => { + const cache = new FixedSizeDocumentCache(6); + const doc1_v1 = new Uint8Array([1, 2]); + const doc1_v2 = new Uint8Array([3, 4]); + const doc2 = new Uint8Array([5, 6]); + const doc3 = new Uint8Array([7, 8]); + const doc4 = new Uint8Array([9, 10]); + + cache.put(1, doc1_v1); + cache.put(2, doc2); + cache.put(3, doc3); + + // Update doc1 (should move it to most recent) + cache.put(1, doc1_v2); + + // Order should be: 2 (LRU), 3, 1 (MRU) + // Adding doc4 should evict doc2 + cache.put(4, doc4); + + assert.equal(cache.get(1), doc1_v2); + assert.equal(cache.get(2), undefined); // Evicted + assert.equal(cache.get(3), doc3); + assert.equal(cache.get(4), doc4); + }); + + it("alternatingAccessPattern", async () => { + const cache = new FixedSizeDocumentCache(4); + const doc1 = new Uint8Array([1, 2]); + const doc2 = new Uint8Array([3, 4]); + const doc3 = new Uint8Array([5, 6]); + + cache.put(1, doc1); + cache.put(2, doc2); + + // Alternate access between doc1 and doc2 + cache.get(1); + cache.get(2); + cache.get(1); + cache.get(2); + + // Order should be: 1, 2 (MRU) + cache.put(3, doc3); + + assert.equal(cache.get(1), undefined); // Evicted + assert.equal(cache.get(2), doc2); + assert.equal(cache.get(3), doc3); + }); + + it("zeroByteDocs", async () => { + const cache = new FixedSizeDocumentCache(2); + const doc1 = new Uint8Array([]); + const doc2 = new Uint8Array([]); + const doc3 = new Uint8Array([1, 2]); + + cache.put(1, doc1); + cache.put(2, doc2); + cache.put(3, doc3); + + assert.equal(cache.get(1), doc1); + assert.equal(cache.get(2), doc2); + assert.equal(cache.get(3), doc3); + }); +}); diff --git a/frontend/sync-client/src/utils/fix-sized-cache.ts b/frontend/sync-client/src/utils/fix-sized-cache.ts new file mode 100644 index 00000000..7adee7b0 --- /dev/null +++ b/frontend/sync-client/src/utils/fix-sized-cache.ts @@ -0,0 +1,113 @@ +// Implements an in-memory fixed-size cache for document contents, + +import type { VaultUpdateId } from "../persistence/database"; + +// Doubly-linked list node for O(1) LRU operations +class LRUNode { + public constructor( + public key: VaultUpdateId, + public value: Uint8Array, + public prev: LRUNode | null = null, + public next: LRUNode | null = null + ) {} +} + +// evicting the least recently used documents when the size limit is exceeded. +export class FixedSizeDocumentCache { + private readonly maxSizeInBytes: number; + private currentSizeInBytes: number; + private readonly cache: Map; + private head: LRUNode | null; // Least recently used + private tail: LRUNode | null; // Most recently used + + public constructor(maxSizeInBytes: number) { + this.maxSizeInBytes = maxSizeInBytes; + this.currentSizeInBytes = 0; + this.cache = new Map(); + this.head = null; + this.tail = null; + } + + public get(updateId: VaultUpdateId): Uint8Array | undefined { + const node = this.cache.get(updateId); + if (node) { + this.moveToTail(node); + return node.value; + } + + return undefined; + } + + public put(updateId: VaultUpdateId, content: Uint8Array): void { + if (content.byteLength > this.maxSizeInBytes) { + // Document is too large to fit in the cache + return; + } + + // If the document is already in the cache, update it + const existingNode = this.cache.get(updateId); + if (existingNode != null) { + this.currentSizeInBytes -= existingNode.value.byteLength; + this.removeNode(existingNode); + this.cache.delete(updateId); + } + + const newNode = new LRUNode(updateId, content); + this.cache.set(updateId, newNode); + this.addToTail(newNode); + this.currentSizeInBytes += content.byteLength; + + // Evict least recently used documents if over size limit + while (this.currentSizeInBytes > this.maxSizeInBytes && this.head) { + const lruNode = this.head; + this.removeNode(lruNode); + this.cache.delete(lruNode.key); + this.currentSizeInBytes -= lruNode.value.byteLength; + } + } + + public clear(): void { + this.cache.clear(); + this.head = null; + this.tail = null; + this.currentSizeInBytes = 0; + } + + private removeNode(node: LRUNode): void { + if (node.prev) { + node.prev.next = node.next; + } else { + this.head = node.next; + } + + if (node.next) { + node.next.prev = node.prev; + } else { + this.tail = node.prev; + } + + node.prev = null; + node.next = null; + } + + private addToTail(node: LRUNode): void { + node.prev = this.tail; + node.next = null; + + if (this.tail) { + this.tail.next = node; + } + + this.tail = node; + + this.head ??= node; + } + + private moveToTail(node: LRUNode): void { + if (node === this.tail) { + return; + } + this.removeNode(node); + this.addToTail(node); + } +} diff --git a/frontend/sync-client/src/utils/is-binary.ts b/frontend/sync-client/src/utils/is-binary.ts new file mode 100644 index 00000000..9e2de954 --- /dev/null +++ b/frontend/sync-client/src/utils/is-binary.ts @@ -0,0 +1,16 @@ +// Text is unlikely to contain null bytes, so we can use that to distinguish binary files. +export function isBinary(content: Uint8Array): boolean { + for (const byte of content) { + if (byte === 0) { + return true; + } + } + + try { + new TextDecoder("utf-8", { fatal: true }).decode(content); + } catch { + return true; + } + + return false; +} diff --git a/sync-server/Cargo.lock b/sync-server/Cargo.lock index c0a05a3c..5fed1ff9 100644 --- a/sync-server/Cargo.lock +++ b/sync-server/Cargo.lock @@ -1680,9 +1680,12 @@ dependencies = [ [[package]] name = "reconcile-text" -version = "0.5.0" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8d690c19b0bf6574cd3591d10f20df5aa52d2af95b8dcaacbc86893292ac8c5" +checksum = "913440a3c2b90cd3ed3e967660f2bb624b71e8059b9fc86960a5f91bd1e2e353" +dependencies = [ + "serde", +] [[package]] name = "redox_syscall" diff --git a/sync-server/Cargo.toml b/sync-server/Cargo.toml index 816d571c..575dd296 100644 --- a/sync-server/Cargo.toml +++ b/sync-server/Cargo.toml @@ -35,7 +35,7 @@ bimap = "0.6.3" ts-rs = { version = "10.1", features = ["uuid-impl", "chrono-impl"] } serde_with = "3.15.1" base64 = "0.22.1" -reconcile-text = "0.5.0" +reconcile-text = { version = "0.7.1", features = ["serde"] } [profile.release] codegen-units = 1 diff --git a/sync-server/src/server.rs b/sync-server/src/server.rs index f63ef551..a5506683 100644 --- a/sync-server/src/server.rs +++ b/sync-server/src/server.rs @@ -117,8 +117,12 @@ fn get_authed_routes(app_state: AppState) -> Router { get(fetch_latest_document_version::fetch_latest_document_version), ) .route( - "/vaults/:vault_id/documents/:document_id", - put(update_document::update_document), + "/vaults/:vault_id/documents/:document_id/binary", + put(update_document::update_binary), + ) + .route( + "/vaults/:vault_id/documents/:document_id/text", + put(update_document::update_text), ) .route( "/vaults/:vault_id/documents/:document_id/versions/:version_id", diff --git a/sync-server/src/server/requests.rs b/sync-server/src/server/requests.rs index 9d1e478b..2e956544 100644 --- a/sync-server/src/server/requests.rs +++ b/sync-server/src/server/requests.rs @@ -1,5 +1,6 @@ use axum::body::Bytes; use axum_typed_multipart::{FieldData, TryFromMultipart}; +use reconcile_text::NumberOrString; use serde::{self, Deserialize}; use ts_rs::TS; @@ -20,17 +21,28 @@ pub struct CreateDocumentVersion { pub content: FieldData, } -#[derive(TS, Debug, TryFromMultipart)] -#[ts(export)] -pub struct UpdateDocumentVersion { +#[derive(Debug, TryFromMultipart)] +pub struct UpdateBinaryDocumentVersion { pub parent_version_id: VaultUpdateId, pub relative_path: String, - #[ts(as = "Vec")] #[form_data(limit = "unlimited")] pub content: FieldData, } +#[derive(TS, Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +#[ts(export)] +pub struct UpdateTextDocumentVersion { + #[ts(as = "i32")] + pub parent_version_id: VaultUpdateId, + + pub relative_path: String, + + #[ts(type = "Array")] + pub content: Vec, +} + #[derive(TS, Debug, Deserialize)] #[serde(rename_all = "camelCase")] #[ts(export)] diff --git a/sync-server/src/server/update_document.rs b/sync-server/src/server/update_document.rs index bf11504c..cb81361b 100644 --- a/sync-server/src/server/update_document.rs +++ b/sync-server/src/server/update_document.rs @@ -6,23 +6,25 @@ use axum::{ use axum_extra::TypedHeader; use axum_typed_multipart::TypedMultipart; use log::info; -use reconcile_text::{BuiltinTokenizer, is_binary, reconcile}; +use reconcile_text::{BuiltinTokenizer, EditedText, reconcile}; use serde::Deserialize; use super::{ - device_id_header::DeviceIdHeader, requests::UpdateDocumentVersion, + device_id_header::DeviceIdHeader, requests::UpdateTextDocumentVersion, responses::DocumentUpdateResponse, }; use crate::{ app_state::{ AppState, - database::models::{DocumentId, StoredDocumentVersion, VaultId}, + database::models::{DocumentId, StoredDocumentVersion, VaultId, VaultUpdateId}, }, config::user_config::User, errors::{SyncServerError, not_found_error, server_error}, + server::requests::UpdateBinaryDocumentVersion, utils::{ - dedup_paths::dedup_paths, is_file_type_mergable::is_file_type_mergable, - normalize::normalize, sanitize_path::sanitize_path, + dedup_paths::dedup_paths, is_binary::is_binary, + is_file_type_mergable::is_file_type_mergable, normalize::normalize, + sanitize_path::sanitize_path, }, }; @@ -30,13 +32,11 @@ use crate::{ pub struct UpdateDocumentPathParams { #[serde(deserialize_with = "normalize")] vault_id: VaultId, - document_id: DocumentId, } #[axum::debug_handler] -#[allow(clippy::too_many_lines)] -pub async fn update_document( +pub async fn update_binary( Path(UpdateDocumentPathParams { vault_id, document_id, @@ -44,25 +44,92 @@ pub async fn update_document( Extension(user): Extension, TypedHeader(device_id): TypedHeader, State(state): State, - TypedMultipart(request): TypedMultipart, + TypedMultipart(request): TypedMultipart, ) -> Result, SyncServerError> { - // No need for a transaction as document versions are immutable - let parent_document = state + let parent_document = get_parent_document(&state, &vault_id, request.parent_version_id).await?; + let content = request.content.contents.to_vec(); + + update_document( + parent_document, + vault_id, + document_id, + user, + device_id, + state, + &request.relative_path, + content, + ) + .await +} + +#[axum::debug_handler] +#[allow(clippy::too_many_lines)] +pub async fn update_text( + Path(UpdateDocumentPathParams { + vault_id, + document_id, + }): Path, + Extension(user): Extension, + TypedHeader(device_id): TypedHeader, + State(state): State, + Json(request): Json, +) -> Result, SyncServerError> { + let parent_document = get_parent_document(&state, &vault_id, request.parent_version_id).await?; + + let edited_text = EditedText::from_diff( + str::from_utf8(&parent_document.content) + .expect("parent must be valid UTF-8 because it's a text document"), + request.content, + &*BuiltinTokenizer::Word, + ); + + let content = edited_text.apply().text().into_bytes(); + + update_document( + parent_document, + vault_id, + document_id, + user, + device_id, + state, + &request.relative_path, + content, + ) + .await +} + +async fn get_parent_document( + state: &AppState, + vault_id: &VaultId, + parent_version_id: VaultUpdateId, +) -> Result { + state .database - .get_document_version(&vault_id, request.parent_version_id, None) + .get_document_version(vault_id, parent_version_id, None) .await .map_err(server_error)? .map_or_else( || { Err(not_found_error(anyhow!( - "Parent version with id `{}` not found", - request.parent_version_id + "Parent version with id `{parent_version_id}` not found" ))) }, Ok, - )?; + ) +} - let sanitized_relative_path = sanitize_path(&request.relative_path); +#[allow(clippy::too_many_lines, clippy::too_many_arguments)] +async fn update_document( + parent_document: StoredDocumentVersion, + vault_id: VaultId, + document_id: DocumentId, + user: User, + device_id: DeviceIdHeader, + state: AppState, + relative_path: &str, + content: Vec, +) -> Result, SyncServerError> { + let sanitized_relative_path = sanitize_path(relative_path); let mut transaction = state .database @@ -102,8 +169,6 @@ pub async fn update_document( ))); } - let content = request.content.contents.to_vec(); - // Return the latest version if the content and path are the same as the latest // version if content == latest_version.content && sanitized_relative_path == latest_version.relative_path diff --git a/sync-server/src/utils.rs b/sync-server/src/utils.rs index b70705f6..7345880d 100644 --- a/sync-server/src/utils.rs +++ b/sync-server/src/utils.rs @@ -1,4 +1,5 @@ pub mod dedup_paths; +pub mod is_binary; pub mod is_file_type_mergable; pub mod normalize; pub mod rotating_file_writer; diff --git a/sync-server/src/utils/is_binary.rs b/sync-server/src/utils/is_binary.rs new file mode 100644 index 00000000..09bfcf94 --- /dev/null +++ b/sync-server/src/utils/is_binary.rs @@ -0,0 +1,26 @@ +/// Heuristically determine if the given data is a binary or a text file's +/// content. +/// +/// Only text inputs can be reconciled using the crate's functions. +#[must_use] +pub fn is_binary(data: &[u8]) -> bool { + if data.contains(&0) { + // Even though the NUL character is valid in UTF-8, it's highly suspicious in + // human-readable text. + return true; + } + + std::str::from_utf8(data).is_err() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_is_binary() { + assert!(is_binary(&[0, 159, 146, 150])); + assert!(is_binary(&[0, 12])); + assert!(!is_binary(b"hello")); + } +} diff --git a/sync-server/src/utils/rotating_file_writer.rs b/sync-server/src/utils/rotating_file_writer.rs index 9f59c5e5..5bf19b5b 100644 --- a/sync-server/src/utils/rotating_file_writer.rs +++ b/sync-server/src/utils/rotating_file_writer.rs @@ -93,6 +93,26 @@ impl RotatingFileWriter { SystemTime::now() >= inner.next_rotation_time } + fn open_or_create_log_file(inner: &mut RotatingFileWriterInner) -> io::Result<()> { + // If we haven't reached rotation time and there's an existing log file, reuse it + if !Self::should_rotate(inner) + && let Some(latest_file) = + Self::find_latest_log_file(&inner.directory, &inner.file_prefix) + { + let filepath = inner.directory.join(&latest_file); + let file = OpenOptions::new() + .create(true) + .append(true) + .open(&filepath)?; + + inner.current_file = Some(file); + return Ok(()); + } + + // Otherwise, create a new log file with current timestamp + Self::rotate(inner) + } + fn rotate(inner: &mut RotatingFileWriterInner) -> io::Result<()> { let timestamp = Local::now().format("%Y-%m-%d_%H-%M-%S"); let filename = format!("{}.{}.log", inner.file_prefix, timestamp); @@ -114,7 +134,9 @@ impl Write for RotatingFileWriter { fn write(&mut self, buf: &[u8]) -> io::Result { let mut inner = self.inner.lock().unwrap(); - if inner.current_file.is_none() || Self::should_rotate(&inner) { + if inner.current_file.is_none() { + Self::open_or_create_log_file(&mut inner)?; + } else if Self::should_rotate(&inner) { Self::rotate(&mut inner)?; } @@ -328,6 +350,7 @@ mod tests { #[test] fn test_restart_behavior() { let temp_dir = std::env::temp_dir().join("test_restart_behavior"); + let _ = fs::remove_dir_all(&temp_dir); // Create initial writer and write some data {