* Remove is_binary from API * Format * Rename file * Test with more feature combinations * Don't depend on serde for wasm * Fix lint & tests * Don't unwrap to MAX number * Expose undiff to JS * Add undiff tests * Lint * Change name
346 lines
11 KiB
TypeScript
346 lines
11 KiB
TypeScript
import {
|
|
CursorPosition as wasmCursorPosition,
|
|
reconcile as wasmReconcile,
|
|
TextWithCursors as wasmTextWithCursors,
|
|
SpanWithHistory as wasmSpanWithHistory,
|
|
reconcileWithHistory as wasmReconcileWithHistory,
|
|
diff as wasmDiff,
|
|
undiff as wasmUndiff,
|
|
initSync,
|
|
} from 'reconcile-text';
|
|
|
|
import wasmBytes from 'reconcile-text/reconcile_text_bg.wasm';
|
|
|
|
// Define the enum values as const arrays to avoid duplication
|
|
const BUILTIN_TOKENIZERS = ['Character', 'Line', 'Word'] as const;
|
|
const HISTORY_VALUES = [
|
|
'Unchanged',
|
|
'AddedFromLeft',
|
|
'AddedFromRight',
|
|
'RemovedFromLeft',
|
|
'RemovedFromRight',
|
|
] as const;
|
|
|
|
/**
|
|
* Tokenisation strategies for text merging.
|
|
*
|
|
* These correspond to the built-in tokenizers available in the underlying WASM module.
|
|
*/
|
|
export type BuiltinTokenizer = (typeof BUILTIN_TOKENIZERS)[number];
|
|
|
|
/**
|
|
* History classification for text spans in merge results.
|
|
*
|
|
* Indicates the origin of each text span in the merged document.
|
|
*/
|
|
export type History = (typeof HISTORY_VALUES)[number];
|
|
|
|
/**
|
|
* Represents a text document with associated cursor positions.
|
|
*
|
|
* This interface is used both as input to reconcile functions (to specify where
|
|
* cursors are positioned in the original documents) and as output (with cursors
|
|
* automatically repositioned after merging).
|
|
*/
|
|
export interface TextWithCursors {
|
|
/** The document's entire content as a string */
|
|
text: string;
|
|
|
|
/**
|
|
* Array of cursor positions within the text. Can be empty if there are no cursors to track.
|
|
* Each cursor has a unique ID and position.
|
|
*/
|
|
cursors: CursorPosition[];
|
|
}
|
|
|
|
/**
|
|
* Represents a text document with associated cursor positions.
|
|
*
|
|
* This interface is used both as input to reconcile functions (to specify where
|
|
* cursors are positioned in the original documents) and as output (with cursors
|
|
* automatically repositioned after merging).
|
|
*/
|
|
export interface TextWithOptionalCursors {
|
|
/** The document's entire content as a string */
|
|
text: string;
|
|
|
|
/**
|
|
* Array of cursor positions within the text. Can be null, undefined, or empty
|
|
* if there are no cursors to track. Each cursor has a unique ID and position.
|
|
*/
|
|
cursors: null | undefined | CursorPosition[];
|
|
}
|
|
|
|
/**
|
|
* Represents a cursor position within a text document.
|
|
*
|
|
* Cursors are automatically repositioned during text merging to maintain their
|
|
* relative positions as text is inserted, deleted, or modified around them.
|
|
*/
|
|
export interface CursorPosition {
|
|
/** Unique identifier for the cursor (can be any number, must be unique within the document) */
|
|
id: number;
|
|
|
|
/** Character position in the text, 0-based index from the beginning of the document */
|
|
position: number;
|
|
}
|
|
|
|
/**
|
|
* Represents a merged text document with cursor positions and detailed change history.
|
|
*
|
|
* This is the return type of `reconcileWithHistory()` and provides complete information
|
|
* about how the merge was performed, including which parts of the final text came from
|
|
* which source documents.
|
|
*/
|
|
export interface TextWithCursorsAndHistory {
|
|
/** The merged document's entire content */
|
|
text: string;
|
|
|
|
/**
|
|
* Array of cursor positions within the merged text. Can empty if there are no cursors to track.
|
|
* All cursors are automatically repositioned from the left and right documents.
|
|
*/
|
|
cursors: CursorPosition[];
|
|
|
|
/**
|
|
* Detailed provenance information showing the origin of each text span in the result.
|
|
* Each span indicates whether it was unchanged, added from left, added from right, etc.
|
|
*/
|
|
history: SpanWithHistory[];
|
|
}
|
|
|
|
/**
|
|
* Represents a span of text in the merged result with its change history.
|
|
*
|
|
* This shows exactly which source document contributed each piece of text to the
|
|
* final merged result. Useful for understanding merge decisions and creating
|
|
* visualisations of how documents were combined.
|
|
*/
|
|
export interface SpanWithHistory {
|
|
/** The text content of this span */
|
|
text: string;
|
|
|
|
/** The origin of this text span in the merge result */
|
|
history: History;
|
|
}
|
|
|
|
const UNSUPPORTED_TOKENIZER_ERROR = `Unsupported tokenizer. Only ${BUILTIN_TOKENIZERS.join(
|
|
', '
|
|
)} are supported.`;
|
|
|
|
let isInitialised = false;
|
|
|
|
/**
|
|
* Merges three versions of text using intelligent conflict resolution.
|
|
*
|
|
* This is the primary function for 3-way text merging. Unlike traditional merge tools
|
|
* that produce conflict markers, this function automatically resolves conflicts by
|
|
* applying both sets of changes where possible.
|
|
*
|
|
* @param original - The original/base version of the text that both sides diverged from
|
|
* @param left - The left version of the text (either string or TextWithCursors with cursor positions)
|
|
* @param right - The right version of the text (either string or TextWithCursors with cursor positions)
|
|
* @param tokenizer - The tokenisation strategy: "Word" (default, recommended for prose),
|
|
* "Character" (fine-grained), or "Line" (similar to git merge)
|
|
* @returns The reconciled text with automatically repositioned cursor positions
|
|
*
|
|
* @example
|
|
* ```typescript
|
|
* const original = "Hello world";
|
|
* const left = "Hello beautiful world"; // Added "beautiful"
|
|
* const right = "Hi world"; // Changed "Hello" to "Hi"
|
|
*
|
|
* const result = reconcile(original, left, right);
|
|
* console.log(result.text); // "Hi beautiful world"
|
|
* ```
|
|
*/
|
|
export function reconcile(
|
|
original: string,
|
|
left: string | TextWithOptionalCursors,
|
|
right: string | TextWithOptionalCursors,
|
|
tokenizer: BuiltinTokenizer = 'Word'
|
|
): TextWithCursors {
|
|
init();
|
|
|
|
if (!BUILTIN_TOKENIZERS.includes(tokenizer)) {
|
|
throw new Error(UNSUPPORTED_TOKENIZER_ERROR);
|
|
}
|
|
|
|
const leftCursor = toWasmTextWithCursors(left);
|
|
const rightCursor = toWasmTextWithCursors(right);
|
|
|
|
const result = wasmReconcile(original, leftCursor, rightCursor, tokenizer);
|
|
|
|
leftCursor.free();
|
|
rightCursor.free();
|
|
|
|
const jsResult = toTextWithCursors(result);
|
|
result.free();
|
|
|
|
return jsResult;
|
|
}
|
|
|
|
/**
|
|
* Generates a compact diff representation between an original and changed text.
|
|
*
|
|
* These can be parsed and unpacked using the `undiff` function or the Rust crate's EditedText::from_diff.
|
|
* Cursor positions are omitted from the diff result.
|
|
*
|
|
* This function computes the differences between two versions of text and returns
|
|
* a compact representation of those changes.
|
|
*
|
|
* @param original - The original/base version of the text
|
|
* @param changed - The modified version of the text (either string or TextWithCursors with cursor positions)
|
|
* @param tokenizer - The tokenisation strategy, which is the same as used in `reconcile`.
|
|
* @returns An array representing the compact diff, with inserts as strings and deletes as negative integers.
|
|
*/
|
|
export function diff(
|
|
original: string,
|
|
changed: string | TextWithOptionalCursors,
|
|
tokenizer: BuiltinTokenizer = 'Word'
|
|
): Array<number | string> {
|
|
init();
|
|
|
|
if (!BUILTIN_TOKENIZERS.includes(tokenizer)) {
|
|
throw new Error(UNSUPPORTED_TOKENIZER_ERROR);
|
|
}
|
|
|
|
const changedWasm = toWasmTextWithCursors(changed);
|
|
|
|
const result = wasmDiff(original, changedWasm, tokenizer);
|
|
|
|
changedWasm.free();
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Applies a compact diff to an original text to reconstruct the changed version.
|
|
*
|
|
* This function takes an original text and a compact diff representation (as produced
|
|
* by the `diff` function) and reconstructs the modified text.
|
|
*
|
|
* @param original - The original/base version of the text
|
|
* @param diff - The compact diff array representing changes (inserts as strings, deletes as negative integers)
|
|
* @param tokenizer - The tokenisation strategy, which is the same as used in `reconcile`.
|
|
* @returns The reconstructed changed text as a string.
|
|
*/
|
|
export function undiff(
|
|
original: string,
|
|
diff: Array<number | string>,
|
|
tokenizer: BuiltinTokenizer = 'Word'
|
|
): string {
|
|
init();
|
|
|
|
if (!BUILTIN_TOKENIZERS.includes(tokenizer)) {
|
|
throw new Error(UNSUPPORTED_TOKENIZER_ERROR);
|
|
}
|
|
|
|
return wasmUndiff(original, diff, tokenizer);
|
|
}
|
|
|
|
/**
|
|
* Merges three versions of text and returns detailed provenance information.
|
|
*
|
|
* This function behaves identically to `reconcile()` but additionally provides
|
|
* detailed historical information about the origin of each text span in the result.
|
|
* This is valuable for understanding how the merge was performed and which changes
|
|
* came from which source.
|
|
*
|
|
* Note: Computing the history is computationally more expensive than the basic merge.
|
|
*
|
|
* @param original - The original/base version of the text that both sides diverged from
|
|
* @param left - The left version of the text (either string or TextWithCursors with cursor positions)
|
|
* @param right - The right version of the text (either string or TextWithCursors with cursor positions)
|
|
* @param tokenizer - The tokenisation strategy: "Word" (default, recommended for prose),
|
|
* "Character" (fine-grained), or "Line" (similar to git merge)
|
|
* @returns The reconciled text with cursor positions and detailed change history
|
|
*
|
|
* @example
|
|
* ```typescript
|
|
* const original = "Hello world";
|
|
* const left = "Hello beautiful world";
|
|
* const right = "Hi world";
|
|
*
|
|
* const result = reconcileWithHistory(original, left, right);
|
|
* console.log(result.text); // "Hi beautiful world"
|
|
* console.log(result.history); // Array of SpanWithHistory objects showing change origins
|
|
* ```
|
|
*/
|
|
export function reconcileWithHistory(
|
|
original: string,
|
|
left: string | TextWithOptionalCursors,
|
|
right: string | TextWithOptionalCursors,
|
|
tokenizer: BuiltinTokenizer = 'Word'
|
|
): TextWithCursorsAndHistory {
|
|
init();
|
|
|
|
if (!BUILTIN_TOKENIZERS.includes(tokenizer)) {
|
|
throw new Error(UNSUPPORTED_TOKENIZER_ERROR);
|
|
}
|
|
|
|
const leftCursor = toWasmTextWithCursors(left);
|
|
const rightCursor = toWasmTextWithCursors(right);
|
|
|
|
const result = wasmReconcileWithHistory(original, leftCursor, rightCursor, tokenizer);
|
|
|
|
leftCursor.free();
|
|
rightCursor.free();
|
|
|
|
const jsResult = toTextWithCursors(result);
|
|
const history = result.history().map(toSpanWithHistory);
|
|
result.free();
|
|
|
|
return {
|
|
...jsResult,
|
|
history,
|
|
};
|
|
}
|
|
|
|
function init() {
|
|
if (isInitialised) {
|
|
return;
|
|
}
|
|
|
|
const wasmBinary = Uint8Array.from(atob(wasmBytes as unknown as string), (c) =>
|
|
c.charCodeAt(0)
|
|
);
|
|
initSync({ module: wasmBinary });
|
|
|
|
isInitialised = true;
|
|
}
|
|
|
|
function toWasmTextWithCursors(
|
|
text: string | TextWithOptionalCursors
|
|
): wasmTextWithCursors {
|
|
const isInputString = typeof text === 'string';
|
|
const leftText = isInputString ? text : text.text;
|
|
const leftCursors = isInputString ? [] : (text.cursors ?? []);
|
|
|
|
return new wasmTextWithCursors(leftText, leftCursors.map(toWasmCursorPosition));
|
|
}
|
|
|
|
function toWasmCursorPosition({ id, position }: CursorPosition): wasmCursorPosition {
|
|
return new wasmCursorPosition(id, position);
|
|
}
|
|
|
|
function toTextWithCursors(textWithCursor: wasmTextWithCursors): TextWithCursors {
|
|
return {
|
|
text: textWithCursor.text(),
|
|
cursors: textWithCursor.cursors().map(toCursorPosition),
|
|
};
|
|
}
|
|
|
|
function toCursorPosition(cursor: wasmCursorPosition): CursorPosition {
|
|
return {
|
|
id: cursor.id(),
|
|
position: cursor.characterIndex(),
|
|
};
|
|
}
|
|
|
|
function toSpanWithHistory(textWithHistory: wasmSpanWithHistory): SpanWithHistory {
|
|
return {
|
|
text: textWithHistory.text(),
|
|
history: textWithHistory.history(),
|
|
};
|
|
}
|