Improve tests
Some checks failed
Check / build (pull_request) Has been cancelled
E2E tests / build (pull_request) Has been cancelled
Publish CLI / publish-docker (pull_request) Has been cancelled
Publish server Docker image / publish-docker (pull_request) Has been cancelled

This commit is contained in:
Andras Schmelczer 2026-05-09 10:14:29 +01:00
parent 20e1c3f22d
commit 9e81343ab1
14 changed files with 161 additions and 124 deletions

View file

@ -11,6 +11,7 @@
"test": "npm run build && node dist/cli.js"
},
"devDependencies": {
"commander": "^14.0.2",
"@types/node": "^25.0.2",
"sync-client": "file:../sync-client",
"ts-loader": "^9.5.4",

View file

@ -4,7 +4,7 @@ import { ServerManager } from "./server-manager";
import { PrefixedLogger } from "./prefixed-logger";
import { TESTS } from "./test-registry";
import type { TestDefinition, TestResult } from "./test-definition";
import { parseConcurrency } from "./parse-concurrency";
import { parseArgs } from "./parse-args";
import { runWithConcurrency } from "./run-with-concurrency";
import { TOKEN, SERVER_BINARY_PATH, CONFIG_PATH } from "./consts";
import * as path from "node:path";
@ -29,7 +29,10 @@ serverManager.installSignalHandlers();
function testUsesPauseServer(test: TestDefinition): boolean {
return test.steps.some(
(step) => step.type === "pause-server" || step.type === "resume-server"
(step) =>
step.type === "pause-server" ||
step.type === "resume-server" ||
step.type === "resume-server-until-history-then-pause"
);
}
@ -134,8 +137,7 @@ async function main(): Promise<void> {
process.exit(1);
}
const filterArg = process.argv.find((a) => a.startsWith("--filter="));
const filter = filterArg?.slice("--filter=".length);
const { filter, concurrency } = parseArgs(process.argv);
const testsToRun: [string, TestDefinition][] = [];
for (const [key, test] of Object.entries(TESTS)) {
@ -160,7 +162,6 @@ async function main(): Promise<void> {
process.exit(1);
}
const concurrency = parseConcurrency();
const regularTests = testsToRun.filter(([, t]) => !testUsesPauseServer(t));
const pauseTests = testsToRun.filter(([, t]) => testUsesPauseServer(t));

View file

@ -14,3 +14,4 @@ export const WEBSOCKET_POLL_INTERVAL_MS = 50;
export const SERVER_READY_POLL_INTERVAL_MS = 100;
export const SERVER_READY_MAX_ATTEMPTS = 50;
export const SERVER_START_MAX_ATTEMPTS = 5;

View file

@ -82,10 +82,10 @@ export class DeterministicAgent extends debugging.InMemoryFileSystem {
this.logger(`${prefix} WARN: ${line.message}`);
break;
case LogLevel.INFO:
this.logger(`${prefix} ${line.message}`);
this.logger(`${prefix} INFO: ${line.message}`);
break;
case LogLevel.DEBUG:
// Skip debug logs to reduce noise
this.logger(`${prefix} DEBUG: ${line.message}`);
break;
}
});
@ -271,8 +271,18 @@ export class DeterministicAgent extends debugging.InMemoryFileSystem {
this.log(`Cleanup waitUntilFinished failed: ${error}`);
}
}
// Surface any background sync errors that arrived after the last
// waitForSync (e.g. between the final assert-consistent and here).
// Without this, regressions that fault the engine during the very
// last step of a test would be silently swallowed.
const pendingErrors = this.syncErrors.splice(0);
await this.client.destroy();
this.log("Cleanup complete");
if (pendingErrors.length > 0) {
throw new Error(
`Client ${this.clientId} had ${pendingErrors.length} background sync error(s) during cleanup:\n${pendingErrors.map((e) => e.message).join("\n")}`
);
}
}
public override async read(path: RelativePath): Promise<Uint8Array> {
@ -439,8 +449,6 @@ export class DeterministicAgent extends debugging.InMemoryFileSystem {
DeterministicAgent.isCreateDocumentRequest(input, init)
) {
this.nextCreateResponseDrop = undefined;
// Release the underlying socket: an unread body keeps the
// undici connection open until GC.
try {
await response.body?.cancel();
} catch {

View file

@ -139,12 +139,22 @@ class ManagedWebSocket implements WebSocket {
}
public resume(): void {
this.paused = false;
// Drain buffered messages BEFORE flipping `paused` to false.
// If `externalOnMessage` is async (its return type is `unknown`),
// dispatch yields control between buffered messages, and a fresh
// live `ws.onmessage` event firing during that yield would jump
// ahead of unprocessed buffered messages — silently reordering
// events relative to the wire. Keeping `paused = true` during the
// drain forces the live handler to keep buffering, so we splice
// those late arrivals onto the tail and dispatch them in order.
while (this.bufferedMessages.length > 0) {
const messages = this.bufferedMessages.splice(0);
for (const msg of messages) {
this.externalOnMessage?.(msg);
}
}
this.paused = false;
}
public send(data: string | ArrayBufferLike | Blob | ArrayBufferView): void {
this.ws.send(data);

View file

@ -0,0 +1,43 @@
import * as os from "node:os";
import { Command, InvalidArgumentError } from "commander";
export interface CliArgs {
filter: string | undefined;
concurrency: number;
}
function parsePositiveInt(value: string): number {
const n = parseInt(value, 10);
if (isNaN(n) || n <= 0) {
throw new InvalidArgumentError("must be a positive integer");
}
return n;
}
export function parseArgs(argv: string[]): CliArgs {
const program = new Command();
program
.name("deterministic-tests")
.description("Scripted multi-client sync tests against a real server")
.option(
"-f, --filter <substring>",
"Run only tests whose name contains this substring"
)
.option(
"-j, --concurrency <number>",
"Number of tests to run in parallel",
parsePositiveInt,
os.cpus().length
);
program.parse(argv);
/* eslint-disable @typescript-eslint/no-unsafe-type-assertion */
const opts = program.opts();
const filter = opts.filter as string | undefined;
const concurrency = opts.concurrency as number;
/* eslint-enable @typescript-eslint/no-unsafe-type-assertion */
return { filter, concurrency };
}

View file

@ -1,17 +0,0 @@
import * as os from "node:os";
export function parseConcurrency(): number {
const args = process.argv.slice(2);
for (let i = 0; i < args.length; i++) {
if (
(args[i] === "--concurrency" || args[i] === "-j") &&
i + 1 < args.length
) {
const n = parseInt(args[i + 1], 10);
if (!isNaN(n) && n > 0) {
return n;
}
}
}
return os.cpus().length;
}

View file

@ -8,7 +8,8 @@ import type { Logger } from "sync-client";
import {
STOP_TIMEOUT_MS,
SERVER_READY_POLL_INTERVAL_MS,
SERVER_READY_MAX_ATTEMPTS
SERVER_READY_MAX_ATTEMPTS,
SERVER_START_MAX_ATTEMPTS
} from "./consts";
export class ServerControl {
@ -42,10 +43,32 @@ export class ServerControl {
throw new Error("Server is already running");
}
// Retry on bind failure: findFreePort closes its probe before we
// spawn, so under heavy parallelism another process can grab the
// same port. Each attempt picks a fresh port.
let lastError: unknown;
for (let attempt = 1; attempt <= SERVER_START_MAX_ATTEMPTS; attempt++) {
try {
await this.startOnce();
return;
} catch (error) {
lastError = error;
this.logger.warn(
`Server start attempt ${attempt}/${SERVER_START_MAX_ATTEMPTS} failed: ${error instanceof Error ? error.message : String(error)}`
);
// startOnce already cleaned up its child + tempdir on failure.
}
}
throw new Error(
`Server failed to start after ${SERVER_START_MAX_ATTEMPTS} attempts: ${lastError instanceof Error ? lastError.message : String(lastError)}`,
{ cause: lastError instanceof Error ? lastError : undefined }
);
}
private async startOnce(): Promise<void> {
const reservation = await findFreePort();
this._port = reservation.port;
// Prefer tmpfs (/host/tmp) over disk-backed /tmp for faster SQLite I/O
const tmpBase = fs.existsSync("/host/tmp") ? "/host/tmp" : os.tmpdir();
const tmpBase = os.tmpdir();
this.tempDir = fs.mkdtempSync(path.join(tmpBase, "vault-link-test-"));
const tempConfigPath = path.join(this.tempDir, "config.yml");
const dbDir = path.join(this.tempDir, "databases");
@ -214,7 +237,36 @@ export class ServerControl {
}
public isRunning(): boolean {
return this.process?.pid !== undefined;
const proc = this.process;
return (
proc !== null &&
proc.pid !== undefined &&
proc.exitCode === null &&
proc.signalCode === null
);
}
/**
* Synchronously SIGCONT-then-SIGKILL the child process. Safe to call
* from a `process.on("exit", ...)` handler, where async work cannot
* run. Used as a last-resort cleanup so a SIGSTOP'd server doesn't
* outlive the test runner and wedge the next CI invocation.
*/
public forceKillSync(): void {
const proc = this.process;
if (proc?.pid === undefined) {
return;
}
try {
process.kill(proc.pid, "SIGCONT");
} catch {
// Process may already be gone or never paused.
}
try {
process.kill(proc.pid, "SIGKILL");
} catch {
// Process already gone.
}
}
private writeConfigFile(destPath: string, dbDir: string): void {

View file

@ -55,5 +55,17 @@ export class ServerManager {
})
.then(() => process.exit(143));
});
// Last-resort synchronous cleanup. Runs even when the process is
// exiting via process.exit() from unhandledRejection /
// uncaughtException — paths where async stopAll() cannot complete.
// SIGSTOP'd servers MUST receive SIGCONT before SIGKILL or the
// kernel keeps them as zombies holding the test's tmpdir, and the
// next CI run can't reuse the port.
process.on("exit", () => {
for (const server of this.activeServers) {
server.forceKillSync();
}
});
}
}

View file

@ -36,7 +36,6 @@ import { offlineUpdateBothThenDeleteOneTest } from "./tests/offline-update-both-
import { offlineCreateSamePathMergeableTest } from "./tests/offline-create-same-path-mergeable.test";
import { deleteDuringPendingCreateTest } from "./tests/delete-during-pending-create.test";
import { threeClientRenameCreateDeleteTest } from "./tests/three-client-rename-create-delete.test";
import { keyMigrationEventDropTest } from "./tests/key-migration-event-drop.test";
import { renameToPathOfUnconfirmedDeleteTest } from "./tests/rename-to-path-of-unconfirmed-delete.test";
import { offlineEditThenMoveSameContentTest } from "./tests/offline-edit-then-move-same-content.test";
import { rapidCreateUpdateDeleteCycleTest } from "./tests/rapid-create-update-delete-cycle.test";
@ -47,10 +46,9 @@ import { offlineMoveThenRemoteDeleteTest } from "./tests/offline-move-then-remot
import { resetClearsRecentlyDeletedResurrectionTest } from "./tests/reset-clears-recently-deleted-resurrection.test";
import { moveThenDeleteStalePathTest } from "./tests/move-then-delete-stale-path.test";
import { interruptedDeleteRetryTest } from "./tests/interrupted-delete-retry.test";
import { updateDoesNotSurvivesRemoteDeleteTest } from "./tests/update-survives-remote-delete.test";
import { updateDoesNotSurviveRemoteDeleteTest } from "./tests/update-does-not-survive-remote-delete.test";
import { movePreservesRemoteUpdateTest } from "./tests/move-preserves-remote-update.test";
import { recentlyDeletedClearedOnReconnectTest } from "./tests/recently-deleted-cleared-on-reconnect.test";
import { migrateKeyPreservesExistingTest } from "./tests/migrate-key-preserves-existing.test";
import { watermarkAdvancesOnSkipTest } from "./tests/watermark-advances-on-skip.test";
import { watermarkGapRemoteUpdateNotRecordedTest } from "./tests/watermark-gap-remote-update-not-recorded.test";
import { queueResetLosesCoalescedLocalEditTest } from "./tests/queue-reset-loses-coalesced-local-edit.test";
@ -147,7 +145,6 @@ export const TESTS: Partial<Record<string, TestDefinition>> = {
"offline-create-same-path-mergeable": offlineCreateSamePathMergeableTest,
"delete-during-pending-create": deleteDuringPendingCreateTest,
"three-client-rename-create-delete": threeClientRenameCreateDeleteTest,
"key-migration-event-drop": keyMigrationEventDropTest,
"rename-to-path-of-unconfirmed-delete": renameToPathOfUnconfirmedDeleteTest,
"offline-edit-then-move-same-content": offlineEditThenMoveSameContentTest,
"rapid-create-update-delete-cycle": rapidCreateUpdateDeleteCycleTest,
@ -160,11 +157,10 @@ export const TESTS: Partial<Record<string, TestDefinition>> = {
"move-then-delete-stale-path": moveThenDeleteStalePathTest,
"offline-delete-vs-remote-update": offlineDeleteVsRemoteUpdateTest,
"interrupted-delete-retry": interruptedDeleteRetryTest,
"update-survives-remote-delete": updateDoesNotSurvivesRemoteDeleteTest,
"update-does-not-survive-remote-delete": updateDoesNotSurviveRemoteDeleteTest,
"move-preserves-remote-update": movePreservesRemoteUpdateTest,
"recently-deleted-cleared-on-reconnect":
recentlyDeletedClearedOnReconnectTest,
"migrate-key-preserves-existing": migrateKeyPreservesExistingTest,
"watermark-advances-on-skip": watermarkAdvancesOnSkipTest,
"watermark-gap-remote-update-not-recorded":
watermarkGapRemoteUpdateNotRecordedTest,

View file

@ -3,8 +3,14 @@ import type { TestDefinition } from "../test-definition";
export const coalesceUpdateRemoteUpdateDataLossTest: TestDefinition = {
description:
"Client 0 edits a file while client 1 is offline. Client 1 reconnects " +
"and immediately edits the same file. Both edits should be preserved.",
"Divergent offline edits with text-merge expectation. Client 0's " +
"remote update fully lands before Client 1 reconnects (`sync`-after " +
"the c0 update enforces this), so Client 1's offline edit merges " +
"against a server-known version, not a coalesced batch. Both " +
"additions must survive in the final merged content. (Filename's " +
"'coalesce' framing is aspirational — a true update-coalesce test " +
"would skip the c0 sync and queue overlapping local + remote " +
"updates against the same parent version.)",
clients: 2,
steps: [
{

View file

@ -1,39 +0,0 @@
import type { AssertableState } from "../utils/assertable-state";
import type { TestDefinition } from "../test-definition";
export const keyMigrationEventDropTest: TestDefinition = {
description:
"Client 0 creates a file and immediately updates it while the server is paused. " +
"After resume, both clients should have the updated content.",
clients: 2,
steps: [
{ type: "enable-sync", client: 0 },
{ type: "enable-sync", client: 1 },
{ type: "barrier" },
{ type: "pause-server" },
{
type: "create",
client: 0,
path: "A.md",
content: "initial content"
},
{
type: "update",
client: 0,
path: "A.md",
content: "updated content"
},
{ type: "resume-server" },
{ type: "barrier" },
{
type: "assert-consistent",
verify: (s: AssertableState): void => {
s.assertFileCount(1).assertContent("A.md", "updated content");
}
}
]
};

View file

@ -1,37 +0,0 @@
import type { AssertableState } from "../utils/assertable-state";
import type { TestDefinition } from "../test-definition";
export const migrateKeyPreservesExistingTest: TestDefinition = {
description:
"Client 0 creates a file and immediately updates it while the server is paused. " +
"After resume, the update must not be lost.",
clients: 2,
steps: [
{ type: "enable-sync", client: 0 },
{ type: "enable-sync", client: 1 },
{ type: "barrier" },
{ type: "pause-server" },
{ type: "create", client: 0, path: "A.md", content: "initial" },
{
type: "update",
client: 0,
path: "A.md",
content: "updated by client 0"
},
{ type: "resume-server" },
{ type: "barrier" },
{
type: "assert-consistent",
verify: (s: AssertableState): void => {
s.assertFileCount(1).assertContains(
"A.md",
"updated by client 0"
);
}
}
]
};