From 55b37039ef4ce0776522358cb2cc7ba54919307d Mon Sep 17 00:00:00 2001 From: Andras Schmelczer Date: Sat, 12 Jul 2025 21:58:05 +0100 Subject: [PATCH] Improve docs --- README.md | 127 ++++++++------------ docs/advanced-ts.md | 70 +++++++++++ examples/website/src/index.ts | 16 ++- src/lib.rs | 83 ++++++++----- src/operation_transformation/edited_text.rs | 34 ++++++ 5 files changed, 225 insertions(+), 105 deletions(-) create mode 100644 docs/advanced-ts.md diff --git a/README.md b/README.md index 45481c9..a659488 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,20 @@ -# Reconcile-text: 3-way text merging with automatic conflict resolution +# `reconcile-text`: conflict-free 3-way text merging -A library for merging conflicting text edits without manual intervention. Unlike traditional 3-way merge tools that produce conflict markers, `reconcile-text` automatically resolves conflicts by applying both sets of changes where possible using algorithms inspired by Operational Transformation. +A Rust and TypeScript library for merging conflicting text edits without manual intervention. Unlike traditional 3-way merge tools that produce conflict markers, `reconcile-text` automatically resolves conflicts by applying both sets of changes (while updating cursor positions) using an algorithm inspired by Operational Transformation. -**[Try the interactive demo](https://schmelczer.dev/reconcile)** to see it in action. +## Try it -Find it on: +✨ **[Try the interactive demo](https://schmelczer.dev/reconcile)** to see it in action! -- [reconcile-text on crates.io](https://crates.io/crates/reconcile-text) -- [reconcile-text on NPM](https://www.npmjs.com/package/reconcile-text) +### Install it in your project + +- `cargo add reconcile-text` ([reconcile-text on crates.io](https://crates.io/crates/reconcile-text)) +- `npm install reconcile-text` ([reconcile-text on NPM](https://www.npmjs.com/package/reconcile-text)) ## Key features - **No conflict markers** — Clean, merged output without Git's `<<<<<<<` markers -- **Cursor tracking** — Automatically repositions cursors and selections during merging +- **Cursor tracking** — Automatically repositions cursors (and selections) during merging - **Flexible tokenisation** — Word-level (default), character-level, or custom strategies - **Unicode support** — Full UTF-8 support with proper handling of complex scripts - **Cross-platform** — Native Rust performance with WebAssembly for JavaScript @@ -21,11 +23,16 @@ Find it on: ### Rust -Run `cargo add reconcile-text` or add `reconcile-text` to your `Cargo.toml`: +Install via crates.io: +```sh +cargo add reconcile-text +``` + +or add `reconcile-text` to your `Cargo.toml`: ```toml [dependencies] -reconcile-text = "0.4" +reconcile-text = "0.5" ``` Then merge away: @@ -35,30 +42,33 @@ use reconcile_text::{reconcile, BuiltinTokenizer}; // Start with original text let parent = "Hello world"; -// Two people edit simultaneously +// Two users edit simultaneously let left = "Hello beautiful world"; // Added "beautiful" let right = "Hi world"; // Changed "Hello" to "Hi" -// Reconcile combines both changes intelligently +// Reconcile combines both changes let result = reconcile(parent, &left.into(), &right.into(), &*BuiltinTokenizer::Word); assert_eq!(result.apply().text(), "Hi beautiful world"); ``` +See [merge-file](examples/merge-file.rs) for another example or the [library's documentation](https://docs.rs/reconcile-text/latest/reconcile_text). + ### JavaScript/TypeScript -Install via npm: +Install via NPM: -```bash +```sh npm install reconcile-text ``` -Then use in your application: +Then use it in your application: ```javascript import { reconcile } from 'reconcile-text'; -// Same example as above +// Start with original text const parent = 'Hello world'; +// Two users edit simultaneously const left = 'Hello beautiful world'; const right = 'Hi world'; @@ -66,86 +76,51 @@ const result = reconcile(parent, left, right); console.log(result.text); // "Hi beautiful world" ``` -## Advanced usage +See the [example website](examples/website/src/index.ts) for a more complex example or the [advanced examples document](https://github.com/schmelczer/reconcile/blob/main/docs/advanced-ts.md). -### Edit provenance +## Motivation -Track which changes came from where using `reconcileWithHistory`: +Collaborative editing presents the challenge of merging conflicting changes when multiple users edit documents simultaneously (or offline). Traditional solutions like Conflict-free Replicated Data Types (CRDTs) or Operational Transformation (OT) works well when you control the entire editing environment and can capture every operation ([1]). However, many workflows involve users editing with different tools — for example, Obsidian users editing Markdown files with various editors from Vim to VS Code. -```javascript -const result = reconcileWithHistory(parent, left, right); -console.log(result.history); // Detailed breakdown of each text span's origin -``` +This creates **Differential Synchronisation** scenarios ([2], [3]): we only know the final state of each document, not the sequence of operations that produced it. This is the same challenge Git addresses, but Git requires manual conflict resolution. The key insight is that while incorrect merges in source code can introduce bugs, human text is more forgiving. A slightly imperfect sentence is often preferable to conflict markers interrupting the flow. -### Tokenisation strategies +> **Note**: Some text domains require more careful handling. Legal contracts, for instance, could have unintended meaning changes from conflicting edits that create double-negations. At the same time, semantic conflicts can still arise when merging code, even in the absence of syntactical conflicts. -Reconcile offers different ways to split text for merging: - -- **Word tokeniser** (`BuiltinTokenizer::Word`) — Splits on word boundaries (recommended for prose) -- **Character tokeniser** (`BuiltinTokenizer::Character`) — Individual characters (fine-grained control) -- **Line tokeniser** (`BuiltinTokenizer::Line`) — Line-by-line (similar to `git merge` or more precisely [`git merge-file`](https://git-scm.com/docs/git-merge-file)) -- **Custom tokeniser** — Roll your own for specialised use cases - -### Cursor tracking - -Ideal for collaborative editors — Reconcile automatically tracks cursor positions through merges: - -```javascript -const result = reconcile( - 'Hello world', - { - text: 'Hello beautiful world', - cursors: [{ id: 1, position: 6 }], // After "Hello " - }, - { - text: 'Hi world', - cursors: [{ id: 2, position: 0 }], // At the beginning - } -); - -// Result: "Hi beautiful world" with repositioned cursors -console.log(result.text); // "Hi beautiful world" -console.log(result.cursors); // [{ id: 1, position: 3 }, { id: 2, position: 0 }] -``` +Differenctial sync is implemented by [universal-sync](https://github.com/invisible-college/universal-sync) and my Obsidian plugin, [vault-link](https://github.com/schmelczer/vault-link) and it requires a merging tool which creates conflict free results for the best user experience. ## How it works -Reconcile builds upon the foundation of `diff3` but adds intelligent conflict resolution. Given a **parent** document and two modified versions (`left` and `right`), here's what happens: +`reconcile-text` starts off similarly to `diff3` ([4], [5]) but adds automated conflict resolution. Given a **parent** document and two modified versions (`left` and `right`), the following happens: -1. **Diff computation** — Myers' algorithm calculates differences between (parent ↔ left) and (parent ↔ right) -2. **Tokenisation** — Text splits into meaningful units (words, characters, etc.) for granular merging -3. **Diff optimisation** — Operations are reordered and consolidated to maximise coherent changes -4. **Operational Transformation** — Edits are woven together using OT principles, preserving all modifications +1. **Tokenisation** — Input texts get split into meaningful units (words, characters, etc.) for granular merging +2. **Diff computation** — Myers' algorithm calculates differences between (parent ↔ left) and (parent ↔ right) +3. **Diff optimisation** — Operations are reordered and consolidated to maximise chained changes +4. **Operational Transformation** — Edits are woven together using OT principles, preserving all modifications and updating cursors -Whilst Reconcile's primary goal isn't implementing Operational Transformation, OT provides an elegant way to merge Myers' diff output. The same could be achieved with CRDTs, though the quality depends entirely on the underlying 2-way diffs. Note that `move` operations aren't supported, as Myers' algorithm decomposes them into separate `insert` and `delete` operations. +While the primary goal of `reconcile-text` isn't to implement OT (you can check out [operational-transform-rs](https://github.com/spebern/operational-transform-rs) for a Rust implementation of it), OT provides an elegant way to merge Myers' diff outputs. The same could be achieved with CRDTs which many libraries implement well for text: see [Loro](https://github.com/loro-dev/loro/), [cola](https://github.com/nomad/cola), and [automerge](https://github.com/automerge/automerge) as a few great examples. -## Background - -Collaborative editing presents the challenge of merging conflicting changes when multiple users edit documents simultaneously, or when synchronising edits across devices. - -Traditional solutions like CRDTs or Operational Transformation work well when you control the entire editing environment and can capture every operation. However, many workflows involve users editing with different tools — for example, Obsidian users editing Markdown files with various editors from Vim to Word. - -This creates **Differential Synchronisation** scenarios [¹]: you only know the final state of each document, not the sequence of operations that produced it. This is the same challenge Git addresses, but Git requires manual conflict resolution. - -The key insight is that whilst incorrect merges in source code can introduce bugs, human text is more forgiving. A slightly imperfect sentence is often preferable to conflict markers interrupting the flow. - -> **Note**: Some text domains require more careful handling. Legal contracts, for instance, could have unintended meaning changes from conflicting edits that create double-negations. +However, the quality of a merge, if only the end result of concurrent changes is observable, depends entirely on the quality of the underlying 2-way diffs. For instance, `move` operations can't be supported as Myers' algorithm decomposes them into separate `insert` and `delete` operations regardless the merging algorithm. ## Development -### Prerequisites +Contributions are welcome! + +### Environment #### Node.js setup 1. Install [nvm](https://github.com/nvm-sh/nvm): - ```bash + ```sh curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.1/install.sh | bash ``` 2. Install and use Node 22: - ```bash + ```sh nvm install 22 && nvm use 22 ``` -3. Optionally set as default: `nvm alias default 22` +3. Optionally set as default: + ```sh + nvm alias default 22 + ``` #### Rust toolchain @@ -158,7 +133,7 @@ The key insight is that whilst incorrect merges in source code can introduce bug cargo install wasm-pack cargo-insta cargo-edit ``` -### Development scripts +### Scripts - **Run tests**: `scripts/test.sh` - **Lint and format**: `scripts/lint.sh` @@ -170,4 +145,8 @@ The key insight is that whilst incorrect merges in source code can introduce bug [MIT](./LICENSE) -[¹]: https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/35605.pdf +[1]:https://marijnhaverbeke.nl/blog/collaborative-editing-cm.html +[2]: https://neil.fraser.name/writing/sync/ +[3]: https://www.cis.upenn.edu/~bcpierce/papers/diff3-short.pdf +[4]: https://blog.jcoglan.com/2017/05/08/merging-with-diff3/ +[5]: https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/35605.pdf diff --git a/docs/advanced-ts.md b/docs/advanced-ts.md new file mode 100644 index 0000000..b4b03d8 --- /dev/null +++ b/docs/advanced-ts.md @@ -0,0 +1,70 @@ +# Advanced usage (TypeScript) + +## Edit provenance + +Track which changes came from where using `reconcileWithHistory`: + +```javascript +const result = reconcileWithHistory( + 'Hello world', + 'Hello beautiful world', + 'Hi world' +); + +console.log(result.text); // "Hi beautiful world" +console.log(result.history); /* +[ + { + "text": "Hello", + "history": "RemovedFromRight" + }, + { + "text": "Hi", + "history": "AddedFromRight" + }, + { + "text": " beautiful", + "history": "AddedFromLeft" + }, + { + "text": " ", + "history": "Unchanged" + }, + { + "text": "world", + "history": "Unchanged" + } +] +*/ +``` + +## Tokenisation strategies + +Reconcile offers different ways to split text for merging: + +- **Word tokeniser** (`"Word"`) — Splits on word boundaries (recommended for prose) +- **Character tokeniser** (`"Character"`) — Individual characters (fine-grained control) +- **Line tokeniser** (`"Line"`) — Line-by-line (similar to `git merge` or more precisely [`git merge-file`](https://git-scm.com/docs/git-merge-file)) + +## Cursor tracking + +Reconcile automatically tracks cursor positions through merges which is handy in a collaborative editor. + +```javascript +const result = reconcile( + 'Hello world', + { + text: 'Hello beautiful world', + cursors: [{ id: 1, position: 6 }], // After "Hello " + }, + { + text: 'Hi world', + cursors: [{ id: 2, position: 0 }], // At the beginning + } +); + +// Result: "Hi beautiful world" with repositioned cursors +console.log(result.text); // "Hi beautiful world" +console.log(result.cursors); // [{ id: 2, position: 0 }, { id: 1, position: 3 }] +``` +> The `cursors` list is sorted by the character position (not id-s). diff --git a/examples/website/src/index.ts b/examples/website/src/index.ts index c9fcb46..43f1a3b 100644 --- a/examples/website/src/index.ts +++ b/examples/website/src/index.ts @@ -1,4 +1,4 @@ -import { reconcileWithHistory } from 'reconcile-text'; +import { reconcile, reconcileWithHistory } from 'reconcile-text'; import type { BuiltinTokenizer } from 'reconcile-text'; import './style.scss'; @@ -22,6 +22,20 @@ async function main(): Promise { leftTextArea.addEventListener('select', updateMergedText); rightTextArea.addEventListener('select', updateMergedText); + console.info( + reconcile( + 'Hello world', + { + text: 'Hello beautiful world', + cursors: [{ id: 1, position: 6 }], // After "Hello " + }, + { + text: 'Hi world', + cursors: [{ id: 2, position: 0 }], // At the beginning + } + ) + ); + window.addEventListener('resize', resizeTextAreas); tokenizerRadios.forEach((radio) => { diff --git a/src/lib.rs b/src/lib.rs index 424615a..aed8ede 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,18 +1,14 @@ -//! # Reconcile: 3-way text merging with automatic conflict resolution +//! # Reconcile: conflict-free 3-way text merging //! //! A library for merging conflicting text edits without manual intervention. -//! Unlike traditional 3-way merge tools that produce conflict markers, this -//! library automatically resolves conflicts by applying both sets of changes -//! where possible. +//! Unlike traditional 3-way merge tools that produce conflict markers, +//! reconcile-text automatically resolves conflicts by applying both sets of +//! changes (while updating cursor positions) using an algorithm inspired by +//! Operational Transformation. //! -//! Based on a combination of Myers' diff algorithm and Operational -//! Transformation principles, it's designed for scenarios where you have a -//! common parent text and two modified versions that need to be intelligently -//! combined. +//! ✨ **[Try the interactive demo](https://schmelczer.dev/reconcile)** to see it in action. //! -//! **[Try the interactive demo](https://schmelczer.dev/reconcile)** to see it in action. -//! -//! ## Basic usage +//! ## Simple example //! //! ``` //! use reconcile_text::{reconcile, BuiltinTokenizer}; @@ -30,8 +26,8 @@ //! //! ## Tokenisation strategies //! -//! Merging operates at the token level, where you control the granularity. -//! The choice of tokeniser significantly affects merge quality and behaviour. +//! Merging happens at the token level, and the choice of tokeniser +//! significantly affects merge quality and behaviour. //! //! ### Built-in tokenisers //! @@ -56,18 +52,21 @@ //! // Line-level tokenisation (similar to git merge) //! let result = reconcile(parent, &left.into(), &right.into(), &*BuiltinTokenizer::Line); //! // Line-level produces different results as it treats each line as atomic +//! assert_eq!(result.apply().text(), "The quick red foxThe very quick brown fox\njumps over the lazy dog"); //! ``` //! //! ### Custom tokenisation //! -//! For specialised use cases, implement custom tokenisation logic: +//! For specialised use cases, such as structured languages, a custom +//! tokenisation logic can be implemented by providing a function with the +//! signature `Fn(&str) -> Vec>`:: //! //! ``` //! use reconcile_text::{reconcile, Token, BuiltinTokenizer}; //! //! // Example: sentence-based tokeniser function //! let sentence_tokeniser = |text: &str| { -//! text.split(". ") +//! text.split_inclusive(". ") //! .map(|sentence| Token::new( //! sentence.to_string(), //! sentence.to_string(), @@ -82,7 +81,7 @@ //! let right = "Hello world. This is a great test."; // Changed "a" to "great" //! //! // For most cases, the built-in word tokeniser works well -//! let result = reconcile(parent, &left.into(), &right.into(), &*BuiltinTokenizer::Word); +//! let result = reconcile(parent, &left.into(), &right.into(), &sentence_tokeniser); //! assert_eq!(result.apply().text(), "Hello beautiful world. This is a great test."); //! ``` //! @@ -118,28 +117,52 @@ //! // Cursor 1 moves from position 6 to position 3 (after "Hi ") //! // Cursor 2 stays at position 0 (beginning) //! ``` +//! > The `cursors` list is sorted by the character position (not id-s). +//! +//! ## Change provenance +//! +//! Track which changes came from where: +//! +//! ```rust +//! use reconcile_text::{History, SpanWithHistory, BuiltinTokenizer, reconcile}; +//! +//! let parent = "Merging text is hard!"; +//! let left = "Merging text is easy!"; // Changed "hard" to "easy" +//! let right = "With reconcile, merging documents is hard!"; // Added prefix and changed word +//! +//! let result = reconcile( +//! parent, +//! &left.into(), +//! &right.into(), +//! &*BuiltinTokenizer::Word, +//! ); +//! +//! assert_eq!( +//! result.apply_with_history(), +//! vec![ +//! SpanWithHistory::new("Merging text".to_string(), History::RemovedFromRight), +//! SpanWithHistory::new( +//! "With reconcile, merging documents".to_string(), +//! History::AddedFromRight +//! ), +//! SpanWithHistory::new(" ".to_string(), History::Unchanged), +//! SpanWithHistory::new("is".to_string(), History::Unchanged), +//! SpanWithHistory::new(" hard!".to_string(), History::RemovedFromLeft), +//! SpanWithHistory::new(" easy!".to_string(), History::AddedFromLeft), +//! ] +//! ); +//! ``` //! //! ## Error handling //! //! The library is designed to be robust and will always produce a result, even -//! in edge cases. However, be aware that: -//! -//! - Binary data is detected and handled gracefully -//! - Unicode text is fully supported -//! - Extremely large diffs may have performance implications +//! in edge cases. However, be aware that extremely large diffs may have +//! performance implications. //! //! ## Algorithm overview //! -//! 1. **Diff computation**: Myers' algorithm calculates differences between -//! parent↔left and parent↔right -//! 2. **Tokenisation**: Text is split into meaningful units (words, characters, -//! etc.) -//! 3. **Diff optimisation**: Operations are reordered and consolidated for -//! coherent changes -//! 4. **Operational Transformation**: Edits are combined using OT principles -//! //! For detailed algorithm explanation, see the -//! [README](README.md#how-it-works). +//! [README](https://github.com/schmelczer/reconcile/blob/main/README.md#how-it-works). mod operation_transformation; mod raw_operation; diff --git a/src/operation_transformation/edited_text.rs b/src/operation_transformation/edited_text.rs index 4a79fb8..174cfaa 100644 --- a/src/operation_transformation/edited_text.rs +++ b/src/operation_transformation/edited_text.rs @@ -248,6 +248,40 @@ where TextWithCursors::new(builder.take(), self.cursors.clone()) } + /// Apply the operations to the text and return the resulting text in chunks + /// together with the provenance describing where each chunk came from. + /// + /// The result includes deleted spans as well. + /// + /// ``` + /// use reconcile_text::{History, SpanWithHistory, BuiltinTokenizer, reconcile}; + /// + /// let parent = "Merging text is hard!"; + /// let left = "Merging text is easy!"; // Changed "hard" to "easy" + /// let right = "With reconcile, merging documents is hard!"; // Added prefix and changed word + /// + /// let result = reconcile( + /// parent, + /// &left.into(), + /// &right.into(), + /// &*BuiltinTokenizer::Word, + /// ); + /// + /// assert_eq!( + /// result.apply_with_history(), + /// vec![ + /// SpanWithHistory::new("Merging text".to_string(), History::RemovedFromRight,), + /// SpanWithHistory::new( + /// "With reconcile, merging documents".to_string(), + /// History::AddedFromRight, + /// ), + /// SpanWithHistory::new(" ".to_string(), History::Unchanged,), + /// SpanWithHistory::new("is".to_string(), History::Unchanged,), + /// SpanWithHistory::new(" hard!".to_string(), History::RemovedFromLeft,), + /// SpanWithHistory::new(" easy!".to_string(), History::AddedFromLeft,), + /// ] + /// ); + /// ``` #[must_use] pub fn apply_with_history(&self) -> Vec { let mut builder: StringBuilder<'_> = StringBuilder::new(self.text);