diff --git a/frontend/deterministic-tests/package.json b/frontend/deterministic-tests/package.json index e1c1b276..4bd82c74 100644 --- a/frontend/deterministic-tests/package.json +++ b/frontend/deterministic-tests/package.json @@ -11,6 +11,7 @@ "test": "npm run build && node dist/cli.js" }, "devDependencies": { + "commander": "^14.0.2", "@types/node": "^25.0.2", "sync-client": "file:../sync-client", "ts-loader": "^9.5.4", diff --git a/frontend/deterministic-tests/src/cli.ts b/frontend/deterministic-tests/src/cli.ts index bb7f5578..6e15cac0 100644 --- a/frontend/deterministic-tests/src/cli.ts +++ b/frontend/deterministic-tests/src/cli.ts @@ -4,7 +4,7 @@ import { ServerManager } from "./server-manager"; import { PrefixedLogger } from "./prefixed-logger"; import { TESTS } from "./test-registry"; import type { TestDefinition, TestResult } from "./test-definition"; -import { parseConcurrency } from "./parse-concurrency"; +import { parseArgs } from "./parse-args"; import { runWithConcurrency } from "./run-with-concurrency"; import { TOKEN, SERVER_BINARY_PATH, CONFIG_PATH } from "./consts"; import * as path from "node:path"; @@ -29,7 +29,10 @@ serverManager.installSignalHandlers(); function testUsesPauseServer(test: TestDefinition): boolean { return test.steps.some( - (step) => step.type === "pause-server" || step.type === "resume-server" + (step) => + step.type === "pause-server" || + step.type === "resume-server" || + step.type === "resume-server-until-history-then-pause" ); } @@ -134,8 +137,7 @@ async function main(): Promise { process.exit(1); } - const filterArg = process.argv.find((a) => a.startsWith("--filter=")); - const filter = filterArg?.slice("--filter=".length); + const { filter, concurrency } = parseArgs(process.argv); const testsToRun: [string, TestDefinition][] = []; for (const [key, test] of Object.entries(TESTS)) { @@ -160,7 +162,6 @@ async function main(): Promise { process.exit(1); } - const concurrency = parseConcurrency(); const regularTests = testsToRun.filter(([, t]) => !testUsesPauseServer(t)); const pauseTests = testsToRun.filter(([, t]) => testUsesPauseServer(t)); diff --git a/frontend/deterministic-tests/src/consts.ts b/frontend/deterministic-tests/src/consts.ts index a0a4b221..d9a2498f 100644 --- a/frontend/deterministic-tests/src/consts.ts +++ b/frontend/deterministic-tests/src/consts.ts @@ -14,3 +14,4 @@ export const WEBSOCKET_POLL_INTERVAL_MS = 50; export const SERVER_READY_POLL_INTERVAL_MS = 100; export const SERVER_READY_MAX_ATTEMPTS = 50; +export const SERVER_START_MAX_ATTEMPTS = 5; diff --git a/frontend/deterministic-tests/src/deterministic-agent.ts b/frontend/deterministic-tests/src/deterministic-agent.ts index 16ee8af8..b32b01c2 100644 --- a/frontend/deterministic-tests/src/deterministic-agent.ts +++ b/frontend/deterministic-tests/src/deterministic-agent.ts @@ -37,15 +37,15 @@ export class DeterministicAgent extends debugging.InMemoryFileSystem { private readonly wsFactory = new ManagedWebSocketFactory(); private nextWriteRename: | { - oldPath: RelativePath; - newPath: RelativePath; - } + oldPath: RelativePath; + newPath: RelativePath; + } | undefined; private nextCreateResponseDrop: | { - dropped: Promise; - resolveDropped: () => void; - } + dropped: Promise; + resolveDropped: () => void; + } | undefined; public constructor( @@ -82,10 +82,10 @@ export class DeterministicAgent extends debugging.InMemoryFileSystem { this.logger(`${prefix} WARN: ${line.message}`); break; case LogLevel.INFO: - this.logger(`${prefix} ${line.message}`); + this.logger(`${prefix} INFO: ${line.message}`); break; case LogLevel.DEBUG: - // Skip debug logs to reduce noise + this.logger(`${prefix} DEBUG: ${line.message}`); break; } }); @@ -271,8 +271,18 @@ export class DeterministicAgent extends debugging.InMemoryFileSystem { this.log(`Cleanup waitUntilFinished failed: ${error}`); } } + // Surface any background sync errors that arrived after the last + // waitForSync (e.g. between the final assert-consistent and here). + // Without this, regressions that fault the engine during the very + // last step of a test would be silently swallowed. + const pendingErrors = this.syncErrors.splice(0); await this.client.destroy(); this.log("Cleanup complete"); + if (pendingErrors.length > 0) { + throw new Error( + `Client ${this.clientId} had ${pendingErrors.length} background sync error(s) during cleanup:\n${pendingErrors.map((e) => e.message).join("\n")}` + ); + } } public override async read(path: RelativePath): Promise { @@ -439,8 +449,6 @@ export class DeterministicAgent extends debugging.InMemoryFileSystem { DeterministicAgent.isCreateDocumentRequest(input, init) ) { this.nextCreateResponseDrop = undefined; - // Release the underlying socket: an unread body keeps the - // undici connection open until GC. try { await response.body?.cancel(); } catch { diff --git a/frontend/deterministic-tests/src/managed-websocket.ts b/frontend/deterministic-tests/src/managed-websocket.ts index 722bc6d3..c759891b 100644 --- a/frontend/deterministic-tests/src/managed-websocket.ts +++ b/frontend/deterministic-tests/src/managed-websocket.ts @@ -139,11 +139,21 @@ class ManagedWebSocket implements WebSocket { } public resume(): void { - this.paused = false; - const messages = this.bufferedMessages.splice(0); - for (const msg of messages) { - this.externalOnMessage?.(msg); + // Drain buffered messages BEFORE flipping `paused` to false. + // If `externalOnMessage` is async (its return type is `unknown`), + // dispatch yields control between buffered messages, and a fresh + // live `ws.onmessage` event firing during that yield would jump + // ahead of unprocessed buffered messages — silently reordering + // events relative to the wire. Keeping `paused = true` during the + // drain forces the live handler to keep buffering, so we splice + // those late arrivals onto the tail and dispatch them in order. + while (this.bufferedMessages.length > 0) { + const messages = this.bufferedMessages.splice(0); + for (const msg of messages) { + this.externalOnMessage?.(msg); + } } + this.paused = false; } public send(data: string | ArrayBufferLike | Blob | ArrayBufferView): void { diff --git a/frontend/deterministic-tests/src/parse-args.ts b/frontend/deterministic-tests/src/parse-args.ts new file mode 100644 index 00000000..11c56f19 --- /dev/null +++ b/frontend/deterministic-tests/src/parse-args.ts @@ -0,0 +1,43 @@ +import * as os from "node:os"; +import { Command, InvalidArgumentError } from "commander"; + +export interface CliArgs { + filter: string | undefined; + concurrency: number; +} + +function parsePositiveInt(value: string): number { + const n = parseInt(value, 10); + if (isNaN(n) || n <= 0) { + throw new InvalidArgumentError("must be a positive integer"); + } + return n; +} + +export function parseArgs(argv: string[]): CliArgs { + const program = new Command(); + + program + .name("deterministic-tests") + .description("Scripted multi-client sync tests against a real server") + .option( + "-f, --filter ", + "Run only tests whose name contains this substring" + ) + .option( + "-j, --concurrency ", + "Number of tests to run in parallel", + parsePositiveInt, + os.cpus().length + ); + + program.parse(argv); + + /* eslint-disable @typescript-eslint/no-unsafe-type-assertion */ + const opts = program.opts(); + const filter = opts.filter as string | undefined; + const concurrency = opts.concurrency as number; + /* eslint-enable @typescript-eslint/no-unsafe-type-assertion */ + + return { filter, concurrency }; +} diff --git a/frontend/deterministic-tests/src/parse-concurrency.ts b/frontend/deterministic-tests/src/parse-concurrency.ts deleted file mode 100644 index f926d1fa..00000000 --- a/frontend/deterministic-tests/src/parse-concurrency.ts +++ /dev/null @@ -1,17 +0,0 @@ -import * as os from "node:os"; - -export function parseConcurrency(): number { - const args = process.argv.slice(2); - for (let i = 0; i < args.length; i++) { - if ( - (args[i] === "--concurrency" || args[i] === "-j") && - i + 1 < args.length - ) { - const n = parseInt(args[i + 1], 10); - if (!isNaN(n) && n > 0) { - return n; - } - } - } - return os.cpus().length; -} diff --git a/frontend/deterministic-tests/src/server-control.ts b/frontend/deterministic-tests/src/server-control.ts index c1d7b2d2..9cb4cde0 100644 --- a/frontend/deterministic-tests/src/server-control.ts +++ b/frontend/deterministic-tests/src/server-control.ts @@ -8,7 +8,8 @@ import type { Logger } from "sync-client"; import { STOP_TIMEOUT_MS, SERVER_READY_POLL_INTERVAL_MS, - SERVER_READY_MAX_ATTEMPTS + SERVER_READY_MAX_ATTEMPTS, + SERVER_START_MAX_ATTEMPTS } from "./consts"; export class ServerControl { @@ -42,10 +43,32 @@ export class ServerControl { throw new Error("Server is already running"); } + // Retry on bind failure: findFreePort closes its probe before we + // spawn, so under heavy parallelism another process can grab the + // same port. Each attempt picks a fresh port. + let lastError: unknown; + for (let attempt = 1; attempt <= SERVER_START_MAX_ATTEMPTS; attempt++) { + try { + await this.startOnce(); + return; + } catch (error) { + lastError = error; + this.logger.warn( + `Server start attempt ${attempt}/${SERVER_START_MAX_ATTEMPTS} failed: ${error instanceof Error ? error.message : String(error)}` + ); + // startOnce already cleaned up its child + tempdir on failure. + } + } + throw new Error( + `Server failed to start after ${SERVER_START_MAX_ATTEMPTS} attempts: ${lastError instanceof Error ? lastError.message : String(lastError)}`, + { cause: lastError instanceof Error ? lastError : undefined } + ); + } + + private async startOnce(): Promise { const reservation = await findFreePort(); this._port = reservation.port; - // Prefer tmpfs (/host/tmp) over disk-backed /tmp for faster SQLite I/O - const tmpBase = fs.existsSync("/host/tmp") ? "/host/tmp" : os.tmpdir(); + const tmpBase = os.tmpdir(); this.tempDir = fs.mkdtempSync(path.join(tmpBase, "vault-link-test-")); const tempConfigPath = path.join(this.tempDir, "config.yml"); const dbDir = path.join(this.tempDir, "databases"); @@ -214,7 +237,36 @@ export class ServerControl { } public isRunning(): boolean { - return this.process?.pid !== undefined; + const proc = this.process; + return ( + proc !== null && + proc.pid !== undefined && + proc.exitCode === null && + proc.signalCode === null + ); + } + + /** + * Synchronously SIGCONT-then-SIGKILL the child process. Safe to call + * from a `process.on("exit", ...)` handler, where async work cannot + * run. Used as a last-resort cleanup so a SIGSTOP'd server doesn't + * outlive the test runner and wedge the next CI invocation. + */ + public forceKillSync(): void { + const proc = this.process; + if (proc?.pid === undefined) { + return; + } + try { + process.kill(proc.pid, "SIGCONT"); + } catch { + // Process may already be gone or never paused. + } + try { + process.kill(proc.pid, "SIGKILL"); + } catch { + // Process already gone. + } } private writeConfigFile(destPath: string, dbDir: string): void { diff --git a/frontend/deterministic-tests/src/server-manager.ts b/frontend/deterministic-tests/src/server-manager.ts index a9697eb0..76c624f7 100644 --- a/frontend/deterministic-tests/src/server-manager.ts +++ b/frontend/deterministic-tests/src/server-manager.ts @@ -55,5 +55,17 @@ export class ServerManager { }) .then(() => process.exit(143)); }); + + // Last-resort synchronous cleanup. Runs even when the process is + // exiting via process.exit() from unhandledRejection / + // uncaughtException — paths where async stopAll() cannot complete. + // SIGSTOP'd servers MUST receive SIGCONT before SIGKILL or the + // kernel keeps them as zombies holding the test's tmpdir, and the + // next CI run can't reuse the port. + process.on("exit", () => { + for (const server of this.activeServers) { + server.forceKillSync(); + } + }); } } diff --git a/frontend/deterministic-tests/src/test-registry.ts b/frontend/deterministic-tests/src/test-registry.ts index 70071121..1a07b411 100644 --- a/frontend/deterministic-tests/src/test-registry.ts +++ b/frontend/deterministic-tests/src/test-registry.ts @@ -36,7 +36,6 @@ import { offlineUpdateBothThenDeleteOneTest } from "./tests/offline-update-both- import { offlineCreateSamePathMergeableTest } from "./tests/offline-create-same-path-mergeable.test"; import { deleteDuringPendingCreateTest } from "./tests/delete-during-pending-create.test"; import { threeClientRenameCreateDeleteTest } from "./tests/three-client-rename-create-delete.test"; -import { keyMigrationEventDropTest } from "./tests/key-migration-event-drop.test"; import { renameToPathOfUnconfirmedDeleteTest } from "./tests/rename-to-path-of-unconfirmed-delete.test"; import { offlineEditThenMoveSameContentTest } from "./tests/offline-edit-then-move-same-content.test"; import { rapidCreateUpdateDeleteCycleTest } from "./tests/rapid-create-update-delete-cycle.test"; @@ -47,10 +46,9 @@ import { offlineMoveThenRemoteDeleteTest } from "./tests/offline-move-then-remot import { resetClearsRecentlyDeletedResurrectionTest } from "./tests/reset-clears-recently-deleted-resurrection.test"; import { moveThenDeleteStalePathTest } from "./tests/move-then-delete-stale-path.test"; import { interruptedDeleteRetryTest } from "./tests/interrupted-delete-retry.test"; -import { updateDoesNotSurvivesRemoteDeleteTest } from "./tests/update-survives-remote-delete.test"; +import { updateDoesNotSurviveRemoteDeleteTest } from "./tests/update-does-not-survive-remote-delete.test"; import { movePreservesRemoteUpdateTest } from "./tests/move-preserves-remote-update.test"; import { recentlyDeletedClearedOnReconnectTest } from "./tests/recently-deleted-cleared-on-reconnect.test"; -import { migrateKeyPreservesExistingTest } from "./tests/migrate-key-preserves-existing.test"; import { watermarkAdvancesOnSkipTest } from "./tests/watermark-advances-on-skip.test"; import { watermarkGapRemoteUpdateNotRecordedTest } from "./tests/watermark-gap-remote-update-not-recorded.test"; import { queueResetLosesCoalescedLocalEditTest } from "./tests/queue-reset-loses-coalesced-local-edit.test"; @@ -147,7 +145,6 @@ export const TESTS: Partial> = { "offline-create-same-path-mergeable": offlineCreateSamePathMergeableTest, "delete-during-pending-create": deleteDuringPendingCreateTest, "three-client-rename-create-delete": threeClientRenameCreateDeleteTest, - "key-migration-event-drop": keyMigrationEventDropTest, "rename-to-path-of-unconfirmed-delete": renameToPathOfUnconfirmedDeleteTest, "offline-edit-then-move-same-content": offlineEditThenMoveSameContentTest, "rapid-create-update-delete-cycle": rapidCreateUpdateDeleteCycleTest, @@ -160,11 +157,10 @@ export const TESTS: Partial> = { "move-then-delete-stale-path": moveThenDeleteStalePathTest, "offline-delete-vs-remote-update": offlineDeleteVsRemoteUpdateTest, "interrupted-delete-retry": interruptedDeleteRetryTest, - "update-survives-remote-delete": updateDoesNotSurvivesRemoteDeleteTest, + "update-does-not-survive-remote-delete": updateDoesNotSurviveRemoteDeleteTest, "move-preserves-remote-update": movePreservesRemoteUpdateTest, "recently-deleted-cleared-on-reconnect": recentlyDeletedClearedOnReconnectTest, - "migrate-key-preserves-existing": migrateKeyPreservesExistingTest, "watermark-advances-on-skip": watermarkAdvancesOnSkipTest, "watermark-gap-remote-update-not-recorded": watermarkGapRemoteUpdateNotRecordedTest, diff --git a/frontend/deterministic-tests/src/tests/coalesce-update-remote-update-data-loss.test.ts b/frontend/deterministic-tests/src/tests/coalesce-update-remote-update-data-loss.test.ts index 69a5ff10..1972526a 100644 --- a/frontend/deterministic-tests/src/tests/coalesce-update-remote-update-data-loss.test.ts +++ b/frontend/deterministic-tests/src/tests/coalesce-update-remote-update-data-loss.test.ts @@ -3,8 +3,14 @@ import type { TestDefinition } from "../test-definition"; export const coalesceUpdateRemoteUpdateDataLossTest: TestDefinition = { description: - "Client 0 edits a file while client 1 is offline. Client 1 reconnects " + - "and immediately edits the same file. Both edits should be preserved.", + "Divergent offline edits with text-merge expectation. Client 0's " + + "remote update fully lands before Client 1 reconnects (`sync`-after " + + "the c0 update enforces this), so Client 1's offline edit merges " + + "against a server-known version, not a coalesced batch. Both " + + "additions must survive in the final merged content. (Filename's " + + "'coalesce' framing is aspirational — a true update-coalesce test " + + "would skip the c0 sync and queue overlapping local + remote " + + "updates against the same parent version.)", clients: 2, steps: [ { diff --git a/frontend/deterministic-tests/src/tests/key-migration-event-drop.test.ts b/frontend/deterministic-tests/src/tests/key-migration-event-drop.test.ts deleted file mode 100644 index cc40e6b0..00000000 --- a/frontend/deterministic-tests/src/tests/key-migration-event-drop.test.ts +++ /dev/null @@ -1,39 +0,0 @@ -import type { AssertableState } from "../utils/assertable-state"; -import type { TestDefinition } from "../test-definition"; - -export const keyMigrationEventDropTest: TestDefinition = { - description: - "Client 0 creates a file and immediately updates it while the server is paused. " + - "After resume, both clients should have the updated content.", - clients: 2, - steps: [ - { type: "enable-sync", client: 0 }, - { type: "enable-sync", client: 1 }, - { type: "barrier" }, - - { type: "pause-server" }, - - { - type: "create", - client: 0, - path: "A.md", - content: "initial content" - }, - { - type: "update", - client: 0, - path: "A.md", - content: "updated content" - }, - - { type: "resume-server" }, - { type: "barrier" }, - - { - type: "assert-consistent", - verify: (s: AssertableState): void => { - s.assertFileCount(1).assertContent("A.md", "updated content"); - } - } - ] -}; diff --git a/frontend/deterministic-tests/src/tests/migrate-key-preserves-existing.test.ts b/frontend/deterministic-tests/src/tests/migrate-key-preserves-existing.test.ts deleted file mode 100644 index bb669e45..00000000 --- a/frontend/deterministic-tests/src/tests/migrate-key-preserves-existing.test.ts +++ /dev/null @@ -1,37 +0,0 @@ -import type { AssertableState } from "../utils/assertable-state"; -import type { TestDefinition } from "../test-definition"; - -export const migrateKeyPreservesExistingTest: TestDefinition = { - description: - "Client 0 creates a file and immediately updates it while the server is paused. " + - "After resume, the update must not be lost.", - clients: 2, - steps: [ - { type: "enable-sync", client: 0 }, - { type: "enable-sync", client: 1 }, - { type: "barrier" }, - - { type: "pause-server" }, - - { type: "create", client: 0, path: "A.md", content: "initial" }, - { - type: "update", - client: 0, - path: "A.md", - content: "updated by client 0" - }, - - { type: "resume-server" }, - { type: "barrier" }, - - { - type: "assert-consistent", - verify: (s: AssertableState): void => { - s.assertFileCount(1).assertContains( - "A.md", - "updated by client 0" - ); - } - } - ] -}; diff --git a/frontend/deterministic-tests/src/tests/update-survives-remote-delete.test.ts b/frontend/deterministic-tests/src/tests/update-does-not-survive-remote-delete.test.ts similarity index 100% rename from frontend/deterministic-tests/src/tests/update-survives-remote-delete.test.ts rename to frontend/deterministic-tests/src/tests/update-does-not-survive-remote-delete.test.ts