From 8bd803c9b2ab771d5281fde67b43d38192615575 Mon Sep 17 00:00:00 2001 From: Andras Schmelczer Date: Fri, 4 Jul 2025 03:14:18 +0100 Subject: [PATCH] Rename TextWithHistory to SpanWithHistory --- reconcile-js/src/index.ts | 14 ++-- src/lib.rs | 84 +++++++++++++++++-- src/operation_transformation/edited_text.rs | 14 ++-- src/tokenizer.rs | 4 +- src/types.rs | 2 +- ...t_with_history.rs => span_with_history.rs} | 6 +- src/wasm.rs | 6 +- 7 files changed, 100 insertions(+), 30 deletions(-) rename src/types/{text_with_history.rs => span_with_history.rs} (84%) diff --git a/reconcile-js/src/index.ts b/reconcile-js/src/index.ts index e9c769b..a2d66cb 100644 --- a/reconcile-js/src/index.ts +++ b/reconcile-js/src/index.ts @@ -2,7 +2,7 @@ import wasmInit, { CursorPosition as wasmCursorPosition, reconcile as wasmReconcile, TextWithCursors as wasmTextWithCursors, - TextWithHistory as wasmTextWithHistory, + SpanWithHistory as wasmSpanWithHistory, BuiltinTokenizer, reconcileWithHistory as wasmReconcileWithHistory, History, @@ -32,10 +32,10 @@ export interface TextWithCursorsAndHistory { /** List of cursor positions, can be null or undefined if there are no cursors */ cursors: null | undefined | CursorPosition[]; /** List of operations leading to `text` from the 3 ancestors */ - history: TextWithHistory[]; + history: SpanWithHistory[]; } -export interface TextWithHistory { +export interface SpanWithHistory { /** Span of text associated with the historical opearion */ text: string; /** Origin of the `text` span */ @@ -139,7 +139,7 @@ export function reconcileWithHistory( rightCursor.free(); const jsResult = toTextWithCursors(result); - const history = result.history().map(toTextWithHistory); + const history = result.history().map(toSpanWithHistory); result.free(); return { @@ -184,9 +184,9 @@ function toCursorPosition(cursor: wasmCursorPosition): CursorPosition { }; } -function toTextWithHistory( - textWithHistory: wasmTextWithHistory -): TextWithHistory { +function toSpanWithHistory( + textWithHistory: wasmSpanWithHistory +): SpanWithHistory { return { text: textWithHistory.text(), history: textWithHistory.history(), diff --git a/src/lib.rs b/src/lib.rs index 4ed5087..bf49649 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,16 +1,86 @@ -#![feature(stmt_expr_attributes)] +//! # Reconcile +//! +//! A library for automatically merging two conflicting versions of a +//! document. `Reconcile` is essentially `git merge` but without any conflict +//! markers (or lost edits) in the output. +//! +//! ``` +//! use reconcile::{reconcile, BuiltinTokenizer}; +//! +//! let parent = "Merging text is hard!"; +//! let left = "Merging text is easy!"; +//! let right = "With reconcile, merging documents is hard!"; +//! +//! let deconflicted = reconcile(parent, &left.into(), &right.into(), &*BuiltinTokenizer::Word); +//! assert_eq!(deconflicted.apply().text(), "With reconcile, merging documents is easy!"); +//! ``` +//! > You can also try out an interactive demo at [schmelczer.dev/reconcile](https://schmelczer.dev/reconcile). +//! +//! ## Tokenizing +//! +//! Merging is done on the token level, the granularity of which is +//! configurable. By default, words are the atoms for merging and thus words +//! can't get jumbled up at the end of reconciling. However, to maintain +//! gramatical correctness after merging, we could choose to treat individual +//! sentences as tokens: +//! +//! ``` +//! ``` +//! +//! > Beware, that if conflicting edits happen within a sentence (therefore each +//! > creating a new token), the sentences will appear duplicated. +//! +//! ``` +//! ``` +//! +//! If finer grained merging is required, we can make every UTF-8 character +//! become its own token: +//! +//! +//! If something custom is needed, for instance, to better support structured +//! text such as Markdown or HTML, a custom tokenizer can be implemented +//! +//! +//! ## Cursors and selection ranges +//! +//! Additionally, it supports updating cursor & +//! selection ranges during the merging too for interactive workflows. +//! +//! +//! ## The algorithm +//! +//! The algorithm starts similarly to `diff3`. Its inputs are a **Parent** +//! document `P` and two conflicting versions: `left` and `right` which have +//! been created from `P` through any series of concurrent edits. When calling +//! `reconcile(parent, left, right)`, first, the 2-way diff of (`parent` & +//! `left`) and (`parent` & `right`) are taken using Myers' algorithm. +//! +//! The +//! +//! Then, the +//! resulting edits are weaved together using the principles of operational +//! transformations ensuring that no change from either `left` or `right` is +//! lost: if either inserted some text, that string will end up in the result +//! and similarly for deletes. +//! +//! The +//! +//! The `reconcile` library +//! -mod diffs; mod operation_transformation; +mod raw_operation; mod tokenizer; +mod types; mod utils; -pub use operation_transformation::{ - CursorPosition, EditedText, TextWithCursors, reconcile, reconcile_with_cursors, - reconcile_with_history, reconcile_with_tokenizer, +pub use operation_transformation::{EditedText, reconcile}; +pub use tokenizer::{BuiltinTokenizer, Tokenizer, token::Token}; +pub use types::{ + cursor_position::CursorPosition, history::History, side::Side, + span_with_history::SpanWithHistory, text_with_cursors::TextWithCursors, }; -pub use tokenizer::{Tokenizer, token::Token, word_tokenizer::word_tokenizer}; -pub use utils::{history::History, side::Side}; +pub use utils::is_binary::is_binary; #[cfg(feature = "wasm")] pub mod wasm; diff --git a/src/operation_transformation/edited_text.rs b/src/operation_transformation/edited_text.rs index c287903..7b7421a 100644 --- a/src/operation_transformation/edited_text.rs +++ b/src/operation_transformation/edited_text.rs @@ -11,7 +11,7 @@ use crate::{ }, raw_operation::RawOperation, tokenizer::Tokenizer, - types::{history::History, side::Side, text_with_history::TextWithHistory}, + types::{history::History, side::Side, span_with_history::SpanWithHistory}, utils::string_builder::StringBuilder, }; @@ -231,7 +231,7 @@ where } #[must_use] - pub fn apply_with_history(&self) -> Vec { + pub fn apply_with_history(&self) -> Vec { let mut builder: StringBuilder<'_> = StringBuilder::new(self.text); let mut history = Vec::with_capacity(self.operations.len()); @@ -241,13 +241,13 @@ where match operation { Operation::Equal { .. } => { - history.push(TextWithHistory::new(History::Unchanged, builder.take())); + history.push(SpanWithHistory::new(History::Unchanged, builder.take())); } Operation::Insert { side, .. } => match side { Side::Left => { - history.push(TextWithHistory::new(History::AddedFromLeft, builder.take())); + history.push(SpanWithHistory::new(History::AddedFromLeft, builder.take())); } - Side::Right => history.push(TextWithHistory::new( + Side::Right => history.push(SpanWithHistory::new( History::AddedFromRight, builder.take(), )), @@ -261,10 +261,10 @@ where let deleted = self.text[*order..*order + *deleted_character_count].to_string(); match side { Side::Left => { - history.push(TextWithHistory::new(History::RemovedFromLeft, deleted)); + history.push(SpanWithHistory::new(History::RemovedFromLeft, deleted)); } Side::Right => { - history.push(TextWithHistory::new(History::RemovedFromRight, deleted)); + history.push(SpanWithHistory::new(History::RemovedFromRight, deleted)); } } } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index b2b9065..b8c8e0f 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1,5 +1,5 @@ -mod word_tokenizer; mod character_tokenizer; +mod word_tokenizer; use std::ops::Deref; @@ -36,7 +36,7 @@ impl Deref for BuiltinTokenizer { fn deref(&self) -> &Self::Target { match self { - BuiltinTokenizer::Character =>&character_tokenizer::character_tokenizer, + BuiltinTokenizer::Character => &character_tokenizer::character_tokenizer, BuiltinTokenizer::Word => &word_tokenizer::word_tokenizer, #[cfg(feature = "wasm")] BuiltinTokenizer::__Invalid => panic!("Unexpected tokenizer type"), diff --git a/src/types.rs b/src/types.rs index b151312..b32ef9a 100644 --- a/src/types.rs +++ b/src/types.rs @@ -1,5 +1,5 @@ pub mod cursor_position; pub mod history; pub mod side; +pub mod span_with_history; pub mod text_with_cursors; -pub mod text_with_history; diff --git a/src/types/text_with_history.rs b/src/types/span_with_history.rs similarity index 84% rename from src/types/text_with_history.rs rename to src/types/span_with_history.rs index b176178..90826c6 100644 --- a/src/types/text_with_history.rs +++ b/src/types/span_with_history.rs @@ -9,15 +9,15 @@ use crate::types::history::History; #[cfg_attr(feature = "wasm", wasm_bindgen)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone, PartialEq)] -pub struct TextWithHistory { +pub struct SpanWithHistory { history: History, text: String, } #[cfg_attr(feature = "wasm", wasm_bindgen)] -impl TextWithHistory { +impl SpanWithHistory { #[must_use] - pub fn new(history: History, text: String) -> Self { TextWithHistory { history, text } } + pub fn new(history: History, text: String) -> Self { SpanWithHistory { history, text } } #[must_use] pub fn history(&self) -> History { self.history } diff --git a/src/wasm.rs b/src/wasm.rs index c2831a2..1234af2 100644 --- a/src/wasm.rs +++ b/src/wasm.rs @@ -14,7 +14,7 @@ use core::str; use cfg_if::cfg_if; use wasm_bindgen::prelude::*; -use crate::{BuiltinTokenizer, CursorPosition, TextWithCursors, TextWithHistory}; +use crate::{BuiltinTokenizer, CursorPosition, SpanWithHistory, TextWithCursors}; cfg_if! { if #[cfg(feature = "wee_alloc")] { #[global_allocator] @@ -120,7 +120,7 @@ fn set_panic_hook() { #[derive(Debug, Clone, PartialEq, Default)] pub struct TextWithCursorsAndHistory { text_with_cursors: TextWithCursors, - history: Vec, + history: Vec, } #[wasm_bindgen] @@ -132,5 +132,5 @@ impl TextWithCursorsAndHistory { pub fn cursors(&self) -> Vec { self.text_with_cursors.cursors() } #[must_use] - pub fn history(&self) -> Vec { self.history.clone() } + pub fn history(&self) -> Vec { self.history.clone() } }