Improve network usage for small text changes (#166)

This commit is contained in:
Andras Schmelczer 2025-11-16 22:10:22 +00:00 committed by GitHub
parent 1da17c462e
commit be1635c26e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
20 changed files with 697 additions and 62 deletions

View file

@ -0,0 +1,239 @@
import { describe, it } from "node:test";
import assert from "node:assert";
import { FixedSizeDocumentCache } from "./fix-sized-cache";
describe("fixedSizeDocumentCache", () => {
it("happyPath", async () => {
const cache = new FixedSizeDocumentCache(4);
const doc1 = new Uint8Array([1, 2]);
const doc2 = new Uint8Array([3, 4]);
const doc3 = new Uint8Array([5, 6]);
cache.put(1, doc1);
assert.equal(cache.get(1), doc1);
cache.put(2, doc2);
assert.equal(cache.get(1), doc1);
assert.equal(cache.get(2), doc2);
cache.put(3, doc3);
assert.equal(cache.get(1), undefined);
assert.equal(cache.get(2), doc2);
assert.equal(cache.get(3), doc3);
});
it("updateExistingEntry", async () => {
const cache = new FixedSizeDocumentCache(4);
const doc1_v1 = new Uint8Array([1, 2]);
const doc1_v2 = new Uint8Array([3, 4]);
const doc2 = new Uint8Array([5, 6]);
cache.put(1, doc1_v1);
assert.equal(cache.get(1), doc1_v1);
cache.put(2, doc2);
assert.equal(cache.get(1), doc1_v1);
assert.equal(cache.get(2), doc2);
cache.put(1, doc1_v2); // Update doc1
assert.equal(cache.get(1), doc1_v2);
assert.equal(cache.get(2), doc2);
});
it("evictOldestEntry", async () => {
const cache = new FixedSizeDocumentCache(4);
const doc1 = new Uint8Array([1, 2]);
const doc2 = new Uint8Array([3, 4]);
const doc3 = new Uint8Array([5, 6]);
cache.put(1, doc1);
cache.put(2, doc2);
assert.equal(cache.get(2), doc2);
assert.equal(cache.get(1), doc1);
cache.put(3, doc3);
assert.equal(cache.get(1), doc1);
assert.equal(cache.get(2), undefined);
assert.equal(cache.get(3), doc3);
});
it("tooLargeEntry", async () => {
const cache = new FixedSizeDocumentCache(2);
const doc1 = new Uint8Array([1, 2, 3]);
cache.put(1, doc1);
assert.equal(cache.get(1), undefined);
});
it("multipleEvictionsInSinglePut", async () => {
const cache = new FixedSizeDocumentCache(10);
const doc1 = new Uint8Array([1, 2]);
const doc2 = new Uint8Array([3, 4]);
const doc3 = new Uint8Array([5, 6]);
const doc4 = new Uint8Array([7, 8, 9, 10, 11, 12, 13, 14]); // 8 bytes
cache.put(1, doc1);
cache.put(2, doc2);
cache.put(3, doc3);
// Cache now has 6 bytes total
cache.put(4, doc4); // Should evict doc1 and doc2 to make room (total: 2+8=10)
assert.equal(cache.get(1), undefined); // Evicted
assert.equal(cache.get(2), undefined); // Evicted
assert.equal(cache.get(3), doc3); // Still present
assert.equal(cache.get(4), doc4);
});
it("clearCache", async () => {
const cache = new FixedSizeDocumentCache(10);
const doc1 = new Uint8Array([1, 2]);
const doc2 = new Uint8Array([3, 4]);
cache.put(1, doc1);
cache.put(2, doc2);
assert.equal(cache.get(1), doc1);
assert.equal(cache.get(2), doc2);
cache.clear();
assert.equal(cache.get(1), undefined);
assert.equal(cache.get(2), undefined);
// Should be able to add entries after clear
cache.put(3, doc1);
assert.equal(cache.get(3), doc1);
});
it("getNonExistentKey", async () => {
const cache = new FixedSizeDocumentCache(10);
const doc1 = new Uint8Array([1, 2]);
cache.put(1, doc1);
assert.equal(cache.get(999), undefined);
});
it("updateEntryWithDifferentSizeTriggeringEviction", async () => {
const cache = new FixedSizeDocumentCache(6);
const doc1_v1 = new Uint8Array([1, 2]);
const doc1_v2 = new Uint8Array([1, 2, 3, 4]); // Larger version
const doc2 = new Uint8Array([5, 6]);
const doc3 = new Uint8Array([7, 8]);
cache.put(1, doc1_v1);
cache.put(2, doc2);
cache.put(3, doc3);
// Update doc1 with larger version, should evict doc2
cache.put(1, doc1_v2);
assert.equal(cache.get(1), doc1_v2);
assert.equal(cache.get(2), undefined); // Evicted
assert.equal(cache.get(3), doc3);
});
it("singleItemCache", async () => {
const cache = new FixedSizeDocumentCache(2);
const doc1 = new Uint8Array([1, 2]);
const doc2 = new Uint8Array([3, 4]);
cache.put(1, doc1);
assert.equal(cache.get(1), doc1);
cache.put(2, doc2);
assert.equal(cache.get(1), undefined); // Evicted
assert.equal(cache.get(2), doc2);
});
it("multipleGetsOnSameEntry", async () => {
const cache = new FixedSizeDocumentCache(4);
const doc1 = new Uint8Array([1, 2]);
const doc2 = new Uint8Array([3, 4]);
const doc3 = new Uint8Array([5, 6]);
cache.put(1, doc1);
cache.put(2, doc2);
// Multiple gets on doc1
cache.get(1);
cache.get(1);
cache.get(1);
// Order should be: 2 (LRU), 1 (MRU)
cache.put(3, doc3);
assert.equal(cache.get(1), doc1);
assert.equal(cache.get(2), undefined); // Evicted
assert.equal(cache.get(3), doc3);
});
it("exactlySizedEntry", async () => {
const cache = new FixedSizeDocumentCache(4);
const doc1 = new Uint8Array([1, 2, 3, 4]); // Exactly cache size
cache.put(1, doc1);
assert.equal(cache.get(1), doc1);
const doc2 = new Uint8Array([5, 6]);
cache.put(2, doc2);
// doc1 should be evicted to make room for doc2
assert.equal(cache.get(1), undefined);
assert.equal(cache.get(2), doc2);
});
it("updateEntryMakesItMostRecent", async () => {
const cache = new FixedSizeDocumentCache(6);
const doc1_v1 = new Uint8Array([1, 2]);
const doc1_v2 = new Uint8Array([3, 4]);
const doc2 = new Uint8Array([5, 6]);
const doc3 = new Uint8Array([7, 8]);
const doc4 = new Uint8Array([9, 10]);
cache.put(1, doc1_v1);
cache.put(2, doc2);
cache.put(3, doc3);
// Update doc1 (should move it to most recent)
cache.put(1, doc1_v2);
// Order should be: 2 (LRU), 3, 1 (MRU)
// Adding doc4 should evict doc2
cache.put(4, doc4);
assert.equal(cache.get(1), doc1_v2);
assert.equal(cache.get(2), undefined); // Evicted
assert.equal(cache.get(3), doc3);
assert.equal(cache.get(4), doc4);
});
it("alternatingAccessPattern", async () => {
const cache = new FixedSizeDocumentCache(4);
const doc1 = new Uint8Array([1, 2]);
const doc2 = new Uint8Array([3, 4]);
const doc3 = new Uint8Array([5, 6]);
cache.put(1, doc1);
cache.put(2, doc2);
// Alternate access between doc1 and doc2
cache.get(1);
cache.get(2);
cache.get(1);
cache.get(2);
// Order should be: 1, 2 (MRU)
cache.put(3, doc3);
assert.equal(cache.get(1), undefined); // Evicted
assert.equal(cache.get(2), doc2);
assert.equal(cache.get(3), doc3);
});
it("zeroByteDocs", async () => {
const cache = new FixedSizeDocumentCache(2);
const doc1 = new Uint8Array([]);
const doc2 = new Uint8Array([]);
const doc3 = new Uint8Array([1, 2]);
cache.put(1, doc1);
cache.put(2, doc2);
cache.put(3, doc3);
assert.equal(cache.get(1), doc1);
assert.equal(cache.get(2), doc2);
assert.equal(cache.get(3), doc3);
});
});

View file

@ -0,0 +1,113 @@
// Implements an in-memory fixed-size cache for document contents,
import type { VaultUpdateId } from "../persistence/database";
// Doubly-linked list node for O(1) LRU operations
class LRUNode {
public constructor(
public key: VaultUpdateId,
public value: Uint8Array,
public prev: LRUNode | null = null,
public next: LRUNode | null = null
) {}
}
// evicting the least recently used documents when the size limit is exceeded.
export class FixedSizeDocumentCache {
private readonly maxSizeInBytes: number;
private currentSizeInBytes: number;
private readonly cache: Map<VaultUpdateId, LRUNode>;
private head: LRUNode | null; // Least recently used
private tail: LRUNode | null; // Most recently used
public constructor(maxSizeInBytes: number) {
this.maxSizeInBytes = maxSizeInBytes;
this.currentSizeInBytes = 0;
this.cache = new Map();
this.head = null;
this.tail = null;
}
public get(updateId: VaultUpdateId): Uint8Array | undefined {
const node = this.cache.get(updateId);
if (node) {
this.moveToTail(node);
return node.value;
}
return undefined;
}
public put(updateId: VaultUpdateId, content: Uint8Array): void {
if (content.byteLength > this.maxSizeInBytes) {
// Document is too large to fit in the cache
return;
}
// If the document is already in the cache, update it
const existingNode = this.cache.get(updateId);
if (existingNode != null) {
this.currentSizeInBytes -= existingNode.value.byteLength;
this.removeNode(existingNode);
this.cache.delete(updateId);
}
const newNode = new LRUNode(updateId, content);
this.cache.set(updateId, newNode);
this.addToTail(newNode);
this.currentSizeInBytes += content.byteLength;
// Evict least recently used documents if over size limit
while (this.currentSizeInBytes > this.maxSizeInBytes && this.head) {
const lruNode = this.head;
this.removeNode(lruNode);
this.cache.delete(lruNode.key);
this.currentSizeInBytes -= lruNode.value.byteLength;
}
}
public clear(): void {
this.cache.clear();
this.head = null;
this.tail = null;
this.currentSizeInBytes = 0;
}
private removeNode(node: LRUNode): void {
if (node.prev) {
node.prev.next = node.next;
} else {
this.head = node.next;
}
if (node.next) {
node.next.prev = node.prev;
} else {
this.tail = node.prev;
}
node.prev = null;
node.next = null;
}
private addToTail(node: LRUNode): void {
node.prev = this.tail;
node.next = null;
if (this.tail) {
this.tail.next = node;
}
this.tail = node;
this.head ??= node;
}
private moveToTail(node: LRUNode): void {
if (node === this.tail) {
return;
}
this.removeNode(node);
this.addToTail(node);
}
}

View file

@ -0,0 +1,16 @@
// Text is unlikely to contain null bytes, so we can use that to distinguish binary files.
export function isBinary(content: Uint8Array): boolean {
for (const byte of content) {
if (byte === 0) {
return true;
}
}
try {
new TextDecoder("utf-8", { fatal: true }).decode(content);
} catch {
return true;
}
return false;
}