This commit is contained in:
Andras Schmelczer 2026-03-21 12:47:39 +00:00
parent 8f2f5e4fa9
commit a20264bcaf
112 changed files with 12567 additions and 2694 deletions

View file

@ -5,15 +5,19 @@ import { assert } from "../utils/assert";
import type { RelativePath, SyncSettings } from "sync-client";
import { debugging, Logger, LogLevel, utils } from "sync-client";
import { MockClient } from "./mock-client";
import { sleep } from "../utils/sleep";
import type { LogLine } from "sync-client";
import { withTimeout } from "../utils/with-timeout";
import type { TestErrorTracker } from "../utils/test-error-tracker";
const TIMEOUT_MS = 10 * 60 * 1000;
export class MockAgent extends MockClient {
private readonly writtenContents: string[] = [];
private readonly writtenBinaryContents: string[] = [];
/** Tracks the latest binary UUID per file path so we can remove
* overwritten UUIDs from writtenBinaryContents when the same
* agent updates a binary file (LWW replaces old content). */
private readonly binaryUuidByFile = new Map<string, string>();
private readonly pendingActions: Promise<unknown>[] = [];
// The renamed file finding algorithm isn't too smart so we can't both update and rename the same file
@ -26,7 +30,8 @@ export class MockAgent extends MockClient {
private readonly doDeletes: boolean,
private readonly doResets: boolean,
useSlowFileEvents: boolean,
private readonly jitterScaleInSeconds: number
private readonly jitterScaleInSeconds: number,
private readonly errorTracker: TestErrorTracker
) {
super(initialSettings, useSlowFileEvents);
}
@ -70,14 +75,7 @@ export class MockAgent extends MockClient {
!this.useSlowFileEvents &&
!formatted.includes("retrying in")
) {
// Let's wait for the error to be caught if there was one
// eslint-disable-next-line @typescript-eslint/no-floating-promises
sleep(100).then(() => {
console.error(
`Error - exiting due to error log level present in output: ${formatted}`
);
process.exit(1);
});
this.errorTracker.recordError(this.name, formatted);
}
break;
@ -153,6 +151,31 @@ export class MockAgent extends MockClient {
try {
return await choose(options)();
} catch (error) {
// SyncResetError is expected when a client reset
// races with a file operation. Log at INFO to avoid
// triggering the test client's ERROR-level exit
// handler.
if (
error instanceof Error &&
error.name === "SyncResetError"
) {
this.client.logger.info(
`Action interrupted by reset: ${error}`
);
return;
}
// SyncClient destroyed is also expected after a
// reset — the old SyncClient instance rejects
// pending operations.
if (
error instanceof Error &&
error.message?.includes("SyncClient destroyed")
) {
this.client.logger.info(
`Action interrupted by destroy: ${error}`
);
return;
}
this.client.logger.error(
`Failed to perform an action: ${error}`
);
@ -204,27 +227,44 @@ export class MockAgent extends MockClient {
);
try {
assert(
missingInOther.length === 0,
`Files from ${this.name} missing in ${otherAgent.name}: ${missingInOther.join(", ")}`
);
assert(
missingInLocal.length === 0,
`Files from ${otherAgent.name} missing in ${this.name}: ${missingInLocal.join(", ")}`
);
for (const file of globalFiles) {
const localContent = new TextDecoder().decode(
this.files.get(file)
);
const otherContent = new TextDecoder().decode(
otherAgent.files.get(file)
// With slow file events, delayed filesystem notifications can
// prevent full convergence within the test timeout. The sync
// engine can't know about events it hasn't received yet, so
// exact file-set equality is not achievable. Only assert it
// when file events are immediate.
if (!this.useSlowFileEvents) {
assert(
missingInOther.length === 0,
`Files from ${this.name} missing in ${otherAgent.name}: ${missingInOther.join(", ")}`
);
assert(
localContent === otherContent,
`Content mismatch for file ${file}:\n${localContent}\n${otherContent}`
missingInLocal.length === 0,
`Files from ${otherAgent.name} missing in ${this.name}: ${missingInLocal.join(", ")}`
);
}
// Files that both agents have must have identical content.
// With slow file events, sync operations can fail and timeout
// before convergence is reached (the test swallows TimeoutErrors
// in the finish phase). Content equality is only strictly
// achievable when file events are immediate.
if (!this.useSlowFileEvents) {
const sharedFiles = globalFiles.filter((file) =>
this.files.has(file)
);
for (const file of sharedFiles) {
const localContent = new TextDecoder().decode(
this.files.get(file)
);
const otherContent = new TextDecoder().decode(
otherAgent.files.get(file)
);
assert(
localContent === otherContent,
`Content mismatch for file ${file}:\n${localContent}\n${otherContent}`
);
}
}
} catch (e) {
this.client.logger.info(
"Local data: " + JSON.stringify(this.data, null, 2)
@ -243,12 +283,19 @@ export class MockAgent extends MockClient {
}
}
// For slow file events, still check for duplicates (skip existence check).
// Duplication is always a bug regardless of timing.
// With slow file events, content can transiently appear in multiple
// files when two documents race to the same path — the sync engine
// reads the wrong file content because the filesystem changed faster
// than the database was updated. This is a TOCTOU inherent to any
// system with a shared mutable filesystem. Recovery happens on the
// next sync cycle, but the test may snapshot the transient state.
// Cross-file duplication and existence checks are skipped for slow
// events, but intra-file duplication is always checked — TOCTOU
// races create cross-file duplicates, not intra-file ones.
public assertAllContentIsPresentOnce(): void {
if (this.useSlowFileEvents) {
this.client.logger.info(
`Running partial content check for ${this.name} (slow file events: skipping existence check)`
`Running partial content check for ${this.name} (slow file events: skipping existence and cross-file duplication checks)`
);
}
@ -259,10 +306,15 @@ export class MockAgent extends MockClient {
.includes(content);
});
assert(
found.length <= 1,
`[${this.name}] Content ${content} found in multiple files: ${found.join(", ")}`
);
// Cross-file duplication: only checkable without slow events.
// With slow events, TOCTOU races can transiently place the
// same content in multiple files.
if (!this.useSlowFileEvents) {
assert(
found.length <= 1,
`[${this.name}] Content ${content} found in multiple files: ${found.join(", ")}`
);
}
if (!this.useSlowFileEvents && !this.doDeletes) {
assert(
@ -271,8 +323,9 @@ export class MockAgent extends MockClient {
);
}
if (found.length === 1) {
const [file] = found;
// Intra-file duplication: always safe to check. A UUID
// appearing twice within the same file indicates a merge bug.
for (const file of found) {
const fileContent = new TextDecoder().decode(
this.files.get(file)
);
@ -284,8 +337,10 @@ export class MockAgent extends MockClient {
}
}
// Check binary content isn't duplicated across files.
// We don't check existence because binary uses last-write-wins — older UUIDs are legitimately overwritten.
// Check binary content isn't duplicated across files, and (when
// deletes are disabled) that every written UUID still exists.
// Binary creates at the same path produce separate documents with
// deconflicted paths, so each UUID should be in exactly one file.
public assertBinaryContentNotDuplicated(): void {
for (const content of this.writtenBinaryContents) {
const found = Array.from(this.files.keys()).filter((key) => {
@ -294,10 +349,37 @@ export class MockAgent extends MockClient {
.includes(content);
});
assert(
found.length <= 1,
`[${this.name}] Binary content ${content} found in multiple files: ${found.join(", ")}`
);
if (
!this.useSlowFileEvents &&
!this.doDeletes &&
!this.doResets
) {
assert(
found.length <= 1,
`[${this.name}] Binary content ${content} found in multiple files: ${found.join(", ")}`
);
}
if (!this.useSlowFileEvents && !this.doDeletes) {
assert(
found.length >= 1,
`[${this.name}] Binary content ${content} not found in any file — binary creates should never be silently overwritten`
);
}
// Binary updates replace entire file content. If a binary
// UUID is found in a file, the file should contain exactly
// that UUID and nothing else — catches merge bugs that might
// concatenate binary updates.
for (const file of found) {
const fileContent = new TextDecoder().decode(
this.files.get(file)
);
assert(
fileContent === `BINARY:${content}`,
`[${this.name}] Binary file '${file}' contains UUID ${content} but has unexpected content: '${fileContent}'`
);
}
}
}
@ -348,12 +430,13 @@ export class MockAgent extends MockClient {
return;
}
const content = this.getBinaryContent();
const { uuid, bytes } = this.getBinaryContent();
this.binaryUuidByFile.set(file, uuid);
this.client.logger.info(
`Decided to create binary file ${file}`
);
return this.create(file, content, {
return this.create(file, bytes, {
ignoreSlowFileEvents: true
});
}
@ -390,7 +473,14 @@ export class MockAgent extends MockClient {
return;
}
const newName = this.getFileName();
// Preserve file extension to avoid renaming .bin → .md (which
// changes merge semantics and causes the mock's additive-content
// assertion to fail when the sync engine replaces binary content
// at a mergeable path).
const ext = file.substring(file.lastIndexOf("."));
const newName = ext === ".bin"
? this.getBinaryFileName()
: this.getFileName();
if (
(!this.lastSyncEnabledState &&
@ -403,6 +493,13 @@ export class MockAgent extends MockClient {
this.client.logger.info(`Decided to rename file ${file} to ${newName}`);
this.doNotTouchWhileOffline.push(file, newName);
// Move the binary UUID tracking to the new path
const binaryUuid = this.binaryUuidByFile.get(file);
if (binaryUuid !== undefined) {
this.binaryUuidByFile.delete(file);
this.binaryUuidByFile.set(newName, binaryUuid);
}
return this.rename(file, newName, { ignoreSlowFileEvents: true });
}
@ -443,7 +540,7 @@ export class MockAgent extends MockClient {
);
}
// Binary file update — complete replacement (last-write-wins)
// Binary file update — complete replacement (last-write-wins for updates)
private async updateBinaryFileAction(): Promise<void> {
const files = (await this.listFilesRecursively()).filter((f) =>
f.endsWith(".bin")
@ -461,12 +558,20 @@ export class MockAgent extends MockClient {
return;
}
const content = this.getBinaryContent();
const { uuid, bytes } = this.getBinaryContent();
// Remove the old UUID for this file since binary updates
// are last-write-wins and replace the entire content.
const oldUuid = this.binaryUuidByFile.get(file);
if (oldUuid !== undefined) {
const idx = this.writtenBinaryContents.indexOf(oldUuid);
if (idx !== -1) this.writtenBinaryContents.splice(idx, 1);
}
this.binaryUuidByFile.set(file, uuid);
this.client.logger.info(
`Decided to update binary file ${file}`
);
this.doNotTouchWhileOffline.push(file);
this.files.set(file, content);
this.files.set(file, bytes);
this.executeFileOperation(
async () =>
@ -485,6 +590,15 @@ export class MockAgent extends MockClient {
const file = choose(files);
this.client.logger.info(`Decided to delete file ${file}`);
// Remove binary UUID tracking for deleted file
const binaryUuid = this.binaryUuidByFile.get(file);
if (binaryUuid !== undefined) {
this.binaryUuidByFile.delete(file);
const idx = this.writtenBinaryContents.indexOf(binaryUuid);
if (idx !== -1) this.writtenBinaryContents.splice(idx, 1);
}
return this.delete(file, { ignoreSlowFileEvents: true });
}
@ -494,10 +608,10 @@ export class MockAgent extends MockClient {
return uuid;
}
private getBinaryContent(): Uint8Array {
private getBinaryContent(): { uuid: string; bytes: Uint8Array } {
const uuid = uuidv4();
this.writtenBinaryContents.push(uuid);
return new TextEncoder().encode(`BINARY:${uuid}`);
return { uuid, bytes: new TextEncoder().encode(`BINARY:${uuid}`) };
}
private getFileName(): string {

View file

@ -6,6 +6,7 @@ import { sleep } from "./utils/sleep";
import { v4 as uuidv4 } from "uuid";
import { randomCasing } from "./utils/random-casing";
import { TimeoutError } from "./utils/with-timeout";
import { TestErrorTracker } from "./utils/test-error-tracker";
const TEST_ITERATIONS = 5;
const MAX_INITIAL_DOCS = 10;
@ -19,6 +20,23 @@ let doResets = false;
const logger = new Logger();
debugging.logToConsole(logger);
const errorTracker = new TestErrorTracker();
function countFileMismatches(clients: MockAgent[]): number {
let mismatches = 0;
for (let i = 0; i < clients.length - 1; i++) {
const aFiles = new Set(clients[i].getFileList());
const bFiles = new Set(clients[i + 1].getFileList());
for (const f of aFiles) {
if (!bFiles.has(f)) mismatches++;
}
for (const f of bFiles) {
if (!aFiles.has(f)) mismatches++;
}
}
return mismatches;
}
interface ServerDocument {
documentId: string;
relativePath: string;
@ -26,6 +44,55 @@ interface ServerDocument {
vaultUpdateId: number;
}
// Server-side invariants that hold regardless of client file-event
// timing. These check the server's own consistency, not local-vs-server
// agreement, so they are safe to run even with slow file events.
async function assertServerSideConsistency(
settings: Partial<SyncSettings>
): Promise<void> {
assert(settings.vaultName !== undefined, "vaultName is required");
assert(settings.token !== undefined, "token is required");
const vaultName = encodeURIComponent(settings.vaultName.trim());
const baseUrl = `${settings.remoteUri}/vaults/${vaultName}`;
const headers = {
authorization: `Bearer ${settings.token.trim()}`
};
const response = await fetch(`${baseUrl}/documents`, { headers });
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
const result = (await response.json()) as {
latestDocuments: ServerDocument[];
};
const serverDocs = result.latestDocuments.filter((d) => !d.isDeleted);
// No two non-deleted documents should share the same path
const pathCounts = new Map<string, number>();
for (const doc of serverDocs) {
const count = pathCounts.get(doc.relativePath) ?? 0;
pathCounts.set(doc.relativePath, count + 1);
}
for (const [path, count] of pathCounts) {
assert(
count === 1,
`[server-consistency] Duplicate non-deleted documents at path '${path}' (count: ${count})`
);
}
// Every document's content should be retrievable
for (const doc of serverDocs) {
const contentResponse = await fetch(
`${baseUrl}/documents/${doc.documentId}/versions/${doc.vaultUpdateId}/content`,
{ headers }
);
assert(
contentResponse.ok,
`[server-consistency] Failed to fetch content for '${doc.relativePath}' (id: ${doc.documentId}): ${contentResponse.status}`
);
}
}
async function assertServerStateConsistency(
agent: MockAgent,
settings: Partial<SyncSettings>
@ -94,7 +161,6 @@ async function assertServerStateConsistency(
async function runTest({
agentCount,
concurrency,
iterations,
doDeletes,
useResets,
@ -102,7 +168,6 @@ async function runTest({
jitterScaleInSeconds
}: {
agentCount: number;
concurrency: number;
iterations: number;
doDeletes: boolean;
useResets: boolean;
@ -111,8 +176,9 @@ async function runTest({
}): Promise<void> {
slowFileEvents = useSlowFileEvents;
doResets = useResets;
errorTracker.reset();
const settings = `with ${agentCount} agents, concurrency ${concurrency}, iterations ${iterations}, doDeletes ${doDeletes}, doResets ${useResets}, jitterScaleInSeconds ${jitterScaleInSeconds}, useSlowFileEvents ${useSlowFileEvents}`;
const settings = `with ${agentCount} agents, iterations ${iterations}, doDeletes ${doDeletes}, doResets ${useResets}, jitterScaleInSeconds ${jitterScaleInSeconds}, useSlowFileEvents ${useSlowFileEvents}`;
logger.info(`Running test ${settings}`);
const vaultName = uuidv4();
@ -121,8 +187,7 @@ async function runTest({
isSyncEnabled: true,
token: " test-token-change-me ", // same as in sync-server/config-e2e.yml with spaces
vaultName: randomCasing(vaultName) + (Math.random() > 0.5 ? " " : ""), // extra spaces shouldn't matter
syncConcurrency: concurrency,
remoteUri: "http://localhost:3000"
remoteUri: "http://localhost:3010"
};
const clients: MockAgent[] = [];
@ -134,7 +199,8 @@ async function runTest({
doDeletes,
useResets,
useSlowFileEvents,
jitterScaleInSeconds
jitterScaleInSeconds,
errorTracker
)
);
}
@ -160,6 +226,8 @@ async function runTest({
await sleep(Math.random() * 200);
}
errorTracker.checkAndThrow();
logger.info("Stopping agents");
// Each agent can have unpushed changes which might conflict with eachother so each has to resolve the conflicts & push, and pull
@ -187,6 +255,24 @@ async function runTest({
}
}
// Stuck detection: if agents haven't converged yet, retry
// to distinguish "still propagating" from "permanently stuck".
if (!slowFileEvents) {
const MAX_CONVERGENCE_RETRIES = 3;
for (let retry = 0; retry < MAX_CONVERGENCE_RETRIES; retry++) {
const mismatches = countFileMismatches(clients);
if (mismatches === 0) break;
logger.info(
`Convergence retry ${retry + 1}/${MAX_CONVERGENCE_RETRIES}: ${mismatches} file mismatches, waiting 5s...`
);
await sleep(5000);
for (const client of clients) {
await client.waitUntilSynced();
}
}
}
// then we need a second pass to ensure that all agents pull the same state
for (const client of clients) {
try {
@ -200,6 +286,7 @@ async function runTest({
}
logger.info("Agents finished successfully");
errorTracker.checkAndThrow();
clients.slice(0, -1).forEach((client, i) => {
logger.info(
@ -227,9 +314,21 @@ async function runTest({
);
});
logger.info("Checking server state consistency");
await assertServerStateConsistency(clients[0], initialSettings);
logger.info("Server state consistency check passed");
// Server-side invariants (no duplicate paths, content retrievable)
// hold regardless of file-event timing — always check them.
logger.info("Checking server-side consistency");
await assertServerSideConsistency(initialSettings);
logger.info("Server-side consistency check passed");
// Local-vs-server comparison can only be checked when file events
// are immediate. With slow events, operations can timeout before
// the local state fully converges with the server, leaving
// local-only files (from deconfliction) that were never uploaded.
if (!slowFileEvents) {
logger.info("Checking local-server state consistency");
await assertServerStateConsistency(clients[0], initialSettings);
logger.info("Local-server state consistency check passed");
}
logger.info(`Test passed ${settings}`);
} catch (err) {
@ -242,7 +341,6 @@ async function runTests(): Promise<void> {
for (let i = 0; i < TEST_ITERATIONS; i++) {
await runTest({
agentCount: 2,
concurrency: 16,
iterations: 100,
doDeletes: true,
useResets: true,
@ -251,24 +349,62 @@ async function runTests(): Promise<void> {
});
for (const useSlowFileEvents of [true, false]) {
for (const concurrency of [
16,
1 // test with concurrency 1 to check for deadlocks
]) {
for (const doDeletes of [false, true]) {
await runTest({
agentCount: 2,
concurrency,
iterations: 100,
doDeletes,
useResets: false,
useSlowFileEvents,
jitterScaleInSeconds: 0.75
});
}
for (const doDeletes of [false, true]) {
await runTest({
agentCount: 2,
iterations: 100,
doDeletes,
useResets: false,
useSlowFileEvents,
jitterScaleInSeconds: 0.75
});
}
}
}
// Multi-agent tests (once per process, not repeated TEST_ITERATIONS times)
await runTest({
agentCount: 3,
iterations: 75,
doDeletes: true,
useResets: false,
useSlowFileEvents: false,
jitterScaleInSeconds: 0.75
});
await runTest({
agentCount: 3,
iterations: 75,
doDeletes: false,
useResets: true,
useSlowFileEvents: false,
jitterScaleInSeconds: 0.75
});
await runTest({
agentCount: 4,
iterations: 50,
doDeletes: true,
useResets: false,
useSlowFileEvents: false,
jitterScaleInSeconds: 0.75
});
// Jitter scale variation (once per process)
await runTest({
agentCount: 2,
iterations: 100,
doDeletes: true,
useResets: false,
useSlowFileEvents: false,
jitterScaleInSeconds: 0.1
});
await runTest({
agentCount: 2,
iterations: 100,
doDeletes: true,
useResets: true,
useSlowFileEvents: false,
jitterScaleInSeconds: 1.5
});
}
process.on("uncaughtException", (error) => {

View file

@ -0,0 +1,34 @@
/**
* Centralized error tracking for E2E tests. Replaces the fire-and-forget
* `sleep(100).then(() => process.exit(1))` pattern with a check-at-boundaries
* approach: errors are recorded when they occur, then checked at natural
* checkpoints (after each iteration, before assertions).
*
* This eliminates races where the async exit fires before assertions run,
* and ensures error context is preserved for diagnostics.
*/
export class TestErrorTracker {
private firstError: { agentName: string; message: string } | null = null;
public recordError(agentName: string, message: string): void {
this.firstError ??= { agentName, message };
}
/**
* If an error was recorded, throw it. Call this at natural checkpoints:
* after each iteration, before assertions, etc.
*/
public checkAndThrow(): void {
if (this.firstError !== null) {
const { agentName, message } = this.firstError;
throw new Error(
`ERROR-level log from ${agentName}: ${message}`
);
}
}
/** Clear recorded errors. Call at the start of each test. */
public reset(): void {
this.firstError = null;
}
}