diff --git a/reconcile-js/src/core.ts b/reconcile-js/src/core.ts new file mode 100644 index 0000000..3dc78db --- /dev/null +++ b/reconcile-js/src/core.ts @@ -0,0 +1,390 @@ +// Shared, platform-agnostic wrapper around the generated wasm-bindgen surface. +// +// The actual wasm bindings are injected by a platform-specific entrypoint: +// - `index.ts` (web/node) instantiates the real WebAssembly module lazily +// on first use via `initSync`. +// - `index.rn.ts` (React Native / Hermes) links a wasm2js (pure-JS) +// implementation, since Hermes does not expose a runtime +// `WebAssembly` global. See `scripts/build-rn.mjs`. + +type WasmModule = typeof import('reconcile-text'); + +/** + * The generated wasm-bindgen surface this library wraps, plus a hook to make + * sure the underlying module is ready. Supplied by a platform entrypoint. + */ +export interface WasmBackend { + CursorPosition: WasmModule['CursorPosition']; + TextWithCursors: WasmModule['TextWithCursors']; + reconcile: WasmModule['reconcile']; + reconcileWithHistory: WasmModule['reconcileWithHistory']; + diff: WasmModule['diff']; + undiff: WasmModule['undiff']; + /** + * Make the wasm module ready for use. Invoked before every operation, so it + * must be cheap and idempotent (a no-op once initialised). + */ + ensureReady(): void; +} + +// Define the enum values as const arrays to avoid duplication +const BUILTIN_TOKENIZERS = ['Character', 'Line', 'Markdown', 'Word'] as const; + +/** + * Tokenisation strategies for text merging. + * + * These correspond to the built-in tokenizers available in the underlying WASM module. + */ +export type BuiltinTokenizer = (typeof BUILTIN_TOKENIZERS)[number]; + +/** + * History classification for text spans in merge results. + * + * Indicates the origin of each text span in the merged document. + */ +export type History = + | 'Unchanged' + | 'AddedFromLeft' + | 'AddedFromRight' + | 'RemovedFromLeft' + | 'RemovedFromRight'; + +/** + * Represents a text document with associated cursor positions. + * + * This interface is used both as input to reconcile functions (to specify where + * cursors are positioned in the original documents) and as output (with cursors + * automatically repositioned after merging). + */ +export interface TextWithCursors { + /** The document's entire content as a string */ + text: string; + + /** + * Array of cursor positions within the text. Can be empty if there are no cursors to track. + * Each cursor has a unique ID and position. + */ + cursors: CursorPosition[]; +} + +/** + * Like `TextWithCursors`, but cursors may be null or undefined (treated as empty). + * Used as input where cursor tracking is optional. + */ +export interface TextWithOptionalCursors { + /** The document's entire content as a string */ + text: string; + + /** + * Array of cursor positions within the text. Can be null, undefined, or empty + * if there are no cursors to track. Each cursor has a unique ID and position. + */ + cursors: null | undefined | CursorPosition[]; +} + +/** + * Represents a cursor position within a text document. + * + * Cursors are automatically repositioned during text merging to maintain their + * relative positions as text is inserted, deleted, or modified around them. + */ +export interface CursorPosition { + /** Unique identifier for the cursor (can be any number, must be unique within the document) */ + id: number; + + /** Character position in the text, 0-based index from the beginning of the document */ + position: number; +} + +/** + * Represents a merged text document with cursor positions and detailed change history. + * + * This is the return type of `reconcileWithHistory()` and provides complete information + * about how the merge was performed, including which parts of the final text came from + * which source documents. + */ +export interface TextWithCursorsAndHistory { + /** The merged document's entire content */ + text: string; + + /** + * Array of cursor positions within the merged text. Can be empty if there are no cursors to track. + * All cursors are automatically repositioned from the left and right documents. + */ + cursors: CursorPosition[]; + + /** + * Detailed provenance information showing the origin of each text span in the result. + * Each span indicates whether it was unchanged, added from left, added from right, etc. + */ + history: SpanWithHistory[]; +} + +/** + * Represents a span of text in the merged result with its change history. + * + * This shows exactly which source document contributed each piece of text to the + * final merged result. Useful for understanding merge decisions and creating + * visualisations of how documents were combined. + */ +export interface SpanWithHistory { + /** The text content of this span */ + text: string; + + /** The origin of this text span in the merge result */ + history: History; +} + +/** The public, synchronous API surface, identical across platforms. */ +export interface ReconcileApi { + /** + * Merges three versions of text using intelligent conflict resolution. + * + * This is the primary function for 3-way text merging. Unlike traditional merge tools + * that produce conflict markers, this function automatically resolves conflicts by + * applying both sets of changes where possible. + * + * @param original - The original/base version of the text that both sides diverged from + * @param left - The left version of the text (either string or TextWithCursors with cursor positions) + * @param right - The right version of the text (either string or TextWithCursors with cursor positions) + * @param tokenizer - The tokenisation strategy: "Word" (default, recommended for prose), + * "Character" (fine-grained), or "Line" (similar to git merge) + * @returns The reconciled text with automatically repositioned cursor positions + * + * @example + * ```typescript + * const original = "Hello world"; + * const left = "Hello beautiful world"; // Added "beautiful" + * const right = "Hi world"; // Changed "Hello" to "Hi" + * + * const result = reconcile(original, left, right); + * console.log(result.text); // "Hi beautiful world" + * ``` + */ + reconcile( + original: string, + left: string | TextWithOptionalCursors, + right: string | TextWithOptionalCursors, + tokenizer?: BuiltinTokenizer + ): TextWithCursors; + + /** + * Generates a compact diff representation between an original and changed text. + * + * These can be parsed and unpacked using the `undiff` function or the Rust crate's EditedText::from_diff. + * Cursor positions are omitted from the diff result. + * + * This function computes the differences between two versions of text and returns + * a compact representation of those changes. + * + * @param original - The original/base version of the text + * @param changed - The modified version of the text (either string or TextWithCursors with cursor positions) + * @param tokenizer - The tokenisation strategy, which is the same as used in `reconcile`. + * @returns An array of inserts (strings), deletes (negative integers), and retained spans (positive integers). + */ + diff( + original: string, + changed: string | TextWithOptionalCursors, + tokenizer?: BuiltinTokenizer + ): Array; + + /** + * Applies a compact diff to an original text to reconstruct the changed version. + * + * This function takes an original text and a compact diff representation (as produced + * by the `diff` function) and reconstructs the modified text. + * + * @param original - The original/base version of the text + * @param diff - The compact diff array (inserts as strings, deletes as negative integers, retained spans as positive integers) + * @param tokenizer - The tokenisation strategy, which is the same as used in `reconcile`. + * @returns The reconstructed changed text as a string. + */ + undiff( + original: string, + diff: Array, + tokenizer?: BuiltinTokenizer + ): string; + + /** + * Merges three versions of text and returns detailed provenance information. + * + * This function behaves like `reconcile()` but also provides + * detailed historical information about the origin of each text span in the result. + * This is valuable for understanding how the merge was performed and which changes + * came from which source. + * + * Note: Computing the history is computationally more expensive than the basic merge. + * + * @param original - The original/base version of the text that both sides diverged from + * @param left - The left version of the text (either string or TextWithCursors with cursor positions) + * @param right - The right version of the text (either string or TextWithCursors with cursor positions) + * @param tokenizer - The tokenisation strategy: "Word" (default, recommended for prose), + * "Character" (fine-grained), or "Line" (similar to git merge) + * @returns The reconciled text with cursor positions and detailed change history + * + * @example + * ```typescript + * const original = "Hello world"; + * const left = "Hello beautiful world"; + * const right = "Hi world"; + * + * const result = reconcileWithHistory(original, left, right); + * console.log(result.text); // "Hi beautiful world" + * console.log(result.history); // Array of SpanWithHistory objects showing change origins + * ``` + */ + reconcileWithHistory( + original: string, + left: string | TextWithOptionalCursors, + right: string | TextWithOptionalCursors, + tokenizer?: BuiltinTokenizer + ): TextWithCursorsAndHistory; +} + +const UNSUPPORTED_TOKENIZER_ERROR = `Unsupported tokenizer, only ${BUILTIN_TOKENIZERS.join( + ', ' +)} are supported`; + +/** + * Build the public {@link ReconcileApi} on top of a {@link WasmBackend}. + * + * Each operation calls `backend.ensureReady()` first, then marshals JS values + * into the wasm representation, invokes the binding, and frees the wasm-side + * objects. The behaviour is identical regardless of whether the backend is a + * real WebAssembly module or its wasm2js translation. + */ +export function makeReconcileApi(backend: WasmBackend): ReconcileApi { + function assertTokenizer(tokenizer: BuiltinTokenizer): void { + if (!BUILTIN_TOKENIZERS.includes(tokenizer)) { + throw new Error(UNSUPPORTED_TOKENIZER_ERROR); + } + } + + function toWasmTextWithCursors(text: string | TextWithOptionalCursors) { + const isInputString = typeof text === 'string'; + const innerText = isInputString ? text : text.text; + const innerCursors = isInputString ? [] : (text.cursors ?? []); + + return new backend.TextWithCursors( + innerText, + innerCursors.map(({ id, position }) => new backend.CursorPosition(id, position)) + ); + } + + function toTextWithCursors(textWithCursor: { + text(): string; + cursors(): Array<{ id(): number; characterIndex(): number; free(): void }>; + }): TextWithCursors { + const wasmCursors = textWithCursor.cursors(); + const cursors = wasmCursors.map((cursor) => ({ + id: cursor.id(), + position: cursor.characterIndex(), + })); + for (const cursor of wasmCursors) { + cursor.free(); + } + + return { + text: textWithCursor.text(), + cursors, + }; + } + + function toSpanWithHistory(span: { + text(): string; + history(): History; + free(): void; + }): SpanWithHistory { + const result = { + text: span.text(), + history: span.history(), + }; + span.free(); + return result; + } + + function reconcile( + original: string, + left: string | TextWithOptionalCursors, + right: string | TextWithOptionalCursors, + tokenizer: BuiltinTokenizer = 'Word' + ): TextWithCursors { + backend.ensureReady(); + assertTokenizer(tokenizer); + + const leftCursor = toWasmTextWithCursors(left); + const rightCursor = toWasmTextWithCursors(right); + + const result = backend.reconcile(original, leftCursor, rightCursor, tokenizer); + + leftCursor.free(); + rightCursor.free(); + + const jsResult = toTextWithCursors(result); + result.free(); + + return jsResult; + } + + function diff( + original: string, + changed: string | TextWithOptionalCursors, + tokenizer: BuiltinTokenizer = 'Word' + ): Array { + backend.ensureReady(); + assertTokenizer(tokenizer); + + const changedWasm = toWasmTextWithCursors(changed); + + const result = backend.diff(original, changedWasm, tokenizer); + + changedWasm.free(); + + return result.map((item) => (typeof item === 'bigint' ? Number(item) : item)); + } + + function undiff( + original: string, + diffValue: Array, + tokenizer: BuiltinTokenizer = 'Word' + ): string { + backend.ensureReady(); + assertTokenizer(tokenizer); + + return backend.undiff(original, diffValue, tokenizer); + } + + function reconcileWithHistory( + original: string, + left: string | TextWithOptionalCursors, + right: string | TextWithOptionalCursors, + tokenizer: BuiltinTokenizer = 'Word' + ): TextWithCursorsAndHistory { + backend.ensureReady(); + assertTokenizer(tokenizer); + + const leftCursor = toWasmTextWithCursors(left); + const rightCursor = toWasmTextWithCursors(right); + + const result = backend.reconcileWithHistory( + original, + leftCursor, + rightCursor, + tokenizer + ); + + leftCursor.free(); + rightCursor.free(); + + const jsResult = toTextWithCursors(result); + const history = result.history().map(toSpanWithHistory); + result.free(); + + return { + ...jsResult, + history, + }; + } + + return { reconcile, diff, undiff, reconcileWithHistory }; +}