reconcile/reconcile-js/src/index.ts
Andras Schmelczer e85eb485e8
Improve compact diff API (#24)
* Remove is_binary from API

* Format

* Rename file

* Test with more feature combinations

* Don't depend on serde for wasm

* Fix lint & tests

* Don't unwrap to MAX number

* Expose undiff to JS

* Add undiff tests

* Lint

* Change name
2025-11-16 15:43:19 +00:00

346 lines
11 KiB
TypeScript

import {
CursorPosition as wasmCursorPosition,
reconcile as wasmReconcile,
TextWithCursors as wasmTextWithCursors,
SpanWithHistory as wasmSpanWithHistory,
reconcileWithHistory as wasmReconcileWithHistory,
diff as wasmDiff,
undiff as wasmUndiff,
initSync,
} from 'reconcile-text';
import wasmBytes from 'reconcile-text/reconcile_text_bg.wasm';
// Define the enum values as const arrays to avoid duplication
const BUILTIN_TOKENIZERS = ['Character', 'Line', 'Word'] as const;
const HISTORY_VALUES = [
'Unchanged',
'AddedFromLeft',
'AddedFromRight',
'RemovedFromLeft',
'RemovedFromRight',
] as const;
/**
* Tokenisation strategies for text merging.
*
* These correspond to the built-in tokenizers available in the underlying WASM module.
*/
export type BuiltinTokenizer = (typeof BUILTIN_TOKENIZERS)[number];
/**
* History classification for text spans in merge results.
*
* Indicates the origin of each text span in the merged document.
*/
export type History = (typeof HISTORY_VALUES)[number];
/**
* Represents a text document with associated cursor positions.
*
* This interface is used both as input to reconcile functions (to specify where
* cursors are positioned in the original documents) and as output (with cursors
* automatically repositioned after merging).
*/
export interface TextWithCursors {
/** The document's entire content as a string */
text: string;
/**
* Array of cursor positions within the text. Can be empty if there are no cursors to track.
* Each cursor has a unique ID and position.
*/
cursors: CursorPosition[];
}
/**
* Represents a text document with associated cursor positions.
*
* This interface is used both as input to reconcile functions (to specify where
* cursors are positioned in the original documents) and as output (with cursors
* automatically repositioned after merging).
*/
export interface TextWithOptionalCursors {
/** The document's entire content as a string */
text: string;
/**
* Array of cursor positions within the text. Can be null, undefined, or empty
* if there are no cursors to track. Each cursor has a unique ID and position.
*/
cursors: null | undefined | CursorPosition[];
}
/**
* Represents a cursor position within a text document.
*
* Cursors are automatically repositioned during text merging to maintain their
* relative positions as text is inserted, deleted, or modified around them.
*/
export interface CursorPosition {
/** Unique identifier for the cursor (can be any number, must be unique within the document) */
id: number;
/** Character position in the text, 0-based index from the beginning of the document */
position: number;
}
/**
* Represents a merged text document with cursor positions and detailed change history.
*
* This is the return type of `reconcileWithHistory()` and provides complete information
* about how the merge was performed, including which parts of the final text came from
* which source documents.
*/
export interface TextWithCursorsAndHistory {
/** The merged document's entire content */
text: string;
/**
* Array of cursor positions within the merged text. Can empty if there are no cursors to track.
* All cursors are automatically repositioned from the left and right documents.
*/
cursors: CursorPosition[];
/**
* Detailed provenance information showing the origin of each text span in the result.
* Each span indicates whether it was unchanged, added from left, added from right, etc.
*/
history: SpanWithHistory[];
}
/**
* Represents a span of text in the merged result with its change history.
*
* This shows exactly which source document contributed each piece of text to the
* final merged result. Useful for understanding merge decisions and creating
* visualisations of how documents were combined.
*/
export interface SpanWithHistory {
/** The text content of this span */
text: string;
/** The origin of this text span in the merge result */
history: History;
}
const UNSUPPORTED_TOKENIZER_ERROR = `Unsupported tokenizer. Only ${BUILTIN_TOKENIZERS.join(
', '
)} are supported.`;
let isInitialised = false;
/**
* Merges three versions of text using intelligent conflict resolution.
*
* This is the primary function for 3-way text merging. Unlike traditional merge tools
* that produce conflict markers, this function automatically resolves conflicts by
* applying both sets of changes where possible.
*
* @param original - The original/base version of the text that both sides diverged from
* @param left - The left version of the text (either string or TextWithCursors with cursor positions)
* @param right - The right version of the text (either string or TextWithCursors with cursor positions)
* @param tokenizer - The tokenisation strategy: "Word" (default, recommended for prose),
* "Character" (fine-grained), or "Line" (similar to git merge)
* @returns The reconciled text with automatically repositioned cursor positions
*
* @example
* ```typescript
* const original = "Hello world";
* const left = "Hello beautiful world"; // Added "beautiful"
* const right = "Hi world"; // Changed "Hello" to "Hi"
*
* const result = reconcile(original, left, right);
* console.log(result.text); // "Hi beautiful world"
* ```
*/
export function reconcile(
original: string,
left: string | TextWithOptionalCursors,
right: string | TextWithOptionalCursors,
tokenizer: BuiltinTokenizer = 'Word'
): TextWithCursors {
init();
if (!BUILTIN_TOKENIZERS.includes(tokenizer)) {
throw new Error(UNSUPPORTED_TOKENIZER_ERROR);
}
const leftCursor = toWasmTextWithCursors(left);
const rightCursor = toWasmTextWithCursors(right);
const result = wasmReconcile(original, leftCursor, rightCursor, tokenizer);
leftCursor.free();
rightCursor.free();
const jsResult = toTextWithCursors(result);
result.free();
return jsResult;
}
/**
* Generates a compact diff representation between an original and changed text.
*
* These can be parsed and unpacked using the `undiff` function or the Rust crate's EditedText::from_diff.
* Cursor positions are omitted from the diff result.
*
* This function computes the differences between two versions of text and returns
* a compact representation of those changes.
*
* @param original - The original/base version of the text
* @param changed - The modified version of the text (either string or TextWithCursors with cursor positions)
* @param tokenizer - The tokenisation strategy, which is the same as used in `reconcile`.
* @returns An array representing the compact diff, with inserts as strings and deletes as negative integers.
*/
export function diff(
original: string,
changed: string | TextWithOptionalCursors,
tokenizer: BuiltinTokenizer = 'Word'
): Array<number | string> {
init();
if (!BUILTIN_TOKENIZERS.includes(tokenizer)) {
throw new Error(UNSUPPORTED_TOKENIZER_ERROR);
}
const changedWasm = toWasmTextWithCursors(changed);
const result = wasmDiff(original, changedWasm, tokenizer);
changedWasm.free();
return result;
}
/**
* Applies a compact diff to an original text to reconstruct the changed version.
*
* This function takes an original text and a compact diff representation (as produced
* by the `diff` function) and reconstructs the modified text.
*
* @param original - The original/base version of the text
* @param diff - The compact diff array representing changes (inserts as strings, deletes as negative integers)
* @param tokenizer - The tokenisation strategy, which is the same as used in `reconcile`.
* @returns The reconstructed changed text as a string.
*/
export function undiff(
original: string,
diff: Array<number | string>,
tokenizer: BuiltinTokenizer = 'Word'
): string {
init();
if (!BUILTIN_TOKENIZERS.includes(tokenizer)) {
throw new Error(UNSUPPORTED_TOKENIZER_ERROR);
}
return wasmUndiff(original, diff, tokenizer);
}
/**
* Merges three versions of text and returns detailed provenance information.
*
* This function behaves identically to `reconcile()` but additionally provides
* detailed historical information about the origin of each text span in the result.
* This is valuable for understanding how the merge was performed and which changes
* came from which source.
*
* Note: Computing the history is computationally more expensive than the basic merge.
*
* @param original - The original/base version of the text that both sides diverged from
* @param left - The left version of the text (either string or TextWithCursors with cursor positions)
* @param right - The right version of the text (either string or TextWithCursors with cursor positions)
* @param tokenizer - The tokenisation strategy: "Word" (default, recommended for prose),
* "Character" (fine-grained), or "Line" (similar to git merge)
* @returns The reconciled text with cursor positions and detailed change history
*
* @example
* ```typescript
* const original = "Hello world";
* const left = "Hello beautiful world";
* const right = "Hi world";
*
* const result = reconcileWithHistory(original, left, right);
* console.log(result.text); // "Hi beautiful world"
* console.log(result.history); // Array of SpanWithHistory objects showing change origins
* ```
*/
export function reconcileWithHistory(
original: string,
left: string | TextWithOptionalCursors,
right: string | TextWithOptionalCursors,
tokenizer: BuiltinTokenizer = 'Word'
): TextWithCursorsAndHistory {
init();
if (!BUILTIN_TOKENIZERS.includes(tokenizer)) {
throw new Error(UNSUPPORTED_TOKENIZER_ERROR);
}
const leftCursor = toWasmTextWithCursors(left);
const rightCursor = toWasmTextWithCursors(right);
const result = wasmReconcileWithHistory(original, leftCursor, rightCursor, tokenizer);
leftCursor.free();
rightCursor.free();
const jsResult = toTextWithCursors(result);
const history = result.history().map(toSpanWithHistory);
result.free();
return {
...jsResult,
history,
};
}
function init() {
if (isInitialised) {
return;
}
const wasmBinary = Uint8Array.from(atob(wasmBytes as unknown as string), (c) =>
c.charCodeAt(0)
);
initSync({ module: wasmBinary });
isInitialised = true;
}
function toWasmTextWithCursors(
text: string | TextWithOptionalCursors
): wasmTextWithCursors {
const isInputString = typeof text === 'string';
const leftText = isInputString ? text : text.text;
const leftCursors = isInputString ? [] : (text.cursors ?? []);
return new wasmTextWithCursors(leftText, leftCursors.map(toWasmCursorPosition));
}
function toWasmCursorPosition({ id, position }: CursorPosition): wasmCursorPosition {
return new wasmCursorPosition(id, position);
}
function toTextWithCursors(textWithCursor: wasmTextWithCursors): TextWithCursors {
return {
text: textWithCursor.text(),
cursors: textWithCursor.cursors().map(toCursorPosition),
};
}
function toCursorPosition(cursor: wasmCursorPosition): CursorPosition {
return {
id: cursor.id(),
position: cursor.characterIndex(),
};
}
function toSpanWithHistory(textWithHistory: wasmSpanWithHistory): SpanWithHistory {
return {
text: textWithHistory.text(),
history: textWithHistory.history(),
};
}