From 4fda83fe171f00b7a74abde8ed424169161dd06c Mon Sep 17 00:00:00 2001 From: Andras Schmelczer Date: Sun, 29 Jun 2025 19:03:55 +0100 Subject: [PATCH] Remove the exponential API --- src/operation_transformation.rs | 134 ++++++++++---------- src/operation_transformation/edited_text.rs | 19 +-- src/tokenizer.rs | 39 +++++- src/types/text_with_cursors.rs | 21 ++- src/wasm.rs | 113 +++++++++++------ tests/example_document.rs | 18 +-- tests/test.rs | 38 ++++-- tests/wasm.rs | 16 ++- 8 files changed, 248 insertions(+), 150 deletions(-) diff --git a/src/operation_transformation.rs b/src/operation_transformation.rs index 0ff9405..bc6ff2d 100644 --- a/src/operation_transformation.rs +++ b/src/operation_transformation.rs @@ -1,54 +1,49 @@ -mod cursor; mod edited_text; mod operation; mod utils; use std::fmt::Debug; -pub use cursor::{CursorPosition, TextWithCursors}; pub use edited_text::EditedText; pub use operation::Operation; use crate::{ Tokenizer, - utils::{history::History, side::Side}, + types::{side::Side, text_with_cursors::TextWithCursors}, }; +/// Given an `original` document and two concurrent edits to it, +/// return a document containing all changes from both `left` +/// and `right`. +/// +/// If a span has been inserted in either the `left` or `right` +/// versions, it will be present in the return value. If both sides +/// insert the same span with a common prefix, that prefix will only +/// be present once in the output. +/// +/// Deletes are preserved from both sides. This means that an insert +/// from one side into a deleted span from the other side will result +/// in the removal of the original span but keeping the inserted text. +/// +/// The function supports UTF-8. The arguments are tokenized at the +/// granularity of words. +/// +/// ``` +/// use reconcile::{reconcile, BuiltinTokenizer}; +/// +/// let parent = "Merging text is hard!"; +/// let left = "Merging text is easy!"; +/// let right = "With reconcile, merging documents is hard!"; +/// +/// let deconflicted = reconcile(parent, &left.into(), &right.into(), &*BuiltinTokenizer::Word); +/// assert_eq!(deconflicted.apply().text(), "With reconcile, merging documents is easy!"); +/// ``` #[must_use] -pub fn reconcile(original: &str, left: &str, right: &str) -> String { - reconcile_with_cursors(original, left.into(), right.into()) - .text - .to_string() -} - -#[must_use] -pub fn reconcile_with_history(original: &str, left: &str, right: &str) -> Vec<(History, String)> { - let left_operations = EditedText::from_strings(original, left.into(), Side::Left); - let right_operations = EditedText::from_strings(original, right.into(), Side::Right); - - left_operations.merge(right_operations).apply_with_history() -} - -#[must_use] -pub fn reconcile_with_cursors<'a>( +pub fn reconcile<'a, T>( original: &'a str, - left: TextWithCursors<'a>, - right: TextWithCursors<'a>, -) -> TextWithCursors<'static> { - let left_operations = EditedText::from_strings(original, left, Side::Left); - let right_operations = EditedText::from_strings(original, right, Side::Right); - - let merged_operations = left_operations.merge(right_operations); - - TextWithCursors::new_owned(merged_operations.apply(), merged_operations.cursors) -} - -#[must_use] -pub fn reconcile_with_tokenizer<'a, F, T>( - original: &str, - left: TextWithCursors<'a>, - right: TextWithCursors<'a>, + left: &TextWithCursors, + right: &TextWithCursors, tokenizer: &Tokenizer, -) -> TextWithCursors<'static> +) -> EditedText<'a, T> where T: PartialEq + Clone + Debug, { @@ -57,9 +52,7 @@ where let right_operations = EditedText::from_strings_with_tokenizer(original, right, tokenizer, Side::Right); - let merged_operations = left_operations.merge(right_operations); - - TextWithCursors::new_owned(merged_operations.apply(), merged_operations.cursors) + left_operations.merge(right_operations) } #[cfg(test)] @@ -70,13 +63,13 @@ mod test { use test_case::test_matrix; use super::*; - use crate::CursorPosition; + use crate::{BuiltinTokenizer, CursorPosition, types::text_with_cursors::TextWithCursors}; #[test] fn test_cursor_complex() { - let original = "this is some complex text to test cursor positions"; + let original: &'static str = "this is some complex text to test cursor positions"; let left = TextWithCursors::new( - "this is really complex text for testing cursor positions", + "this is really complex text for testing cursor positions".to_owned(), vec![ CursorPosition { id: 0, @@ -89,7 +82,7 @@ mod test { ], ); let right = TextWithCursors::new( - "that was some complex sample to test cursor movements", + "that was some complex sample to test cursor movements".to_owned(), vec![ CursorPosition { id: 2, @@ -102,31 +95,31 @@ mod test { ], ); - let merged = reconcile_with_cursors(original, left, right); - + let merged = reconcile(original, &left, &right, &*BuiltinTokenizer::Word).apply(); assert_eq!( - merged, - TextWithCursors::new( - "that was really complex sample for testing cursor movements", - vec![ - CursorPosition { - id: 2, - char_index: 5 - }, // unchanged - CursorPosition { - id: 0, - char_index: 9 - }, // before "really" - CursorPosition { - id: 1, - char_index: 23 - }, // inside of "s|ample" because "text" got replaced by "sample" - CursorPosition { - id: 3, - char_index: 30 - }, // after "complex sample" - ] - ) + &merged.text(), + "that was really complex sample for testing cursor movements" + ); + assert_eq!( + merged.cursors(), + vec![ + CursorPosition { + id: 2, + char_index: 5 + }, // unchanged + CursorPosition { + id: 0, + char_index: 9 + }, // before "really" + CursorPosition { + id: 1, + char_index: 23 + }, // inside of "s|ample" because "text" got replaced by "sample" + CursorPosition { + id: 3, + char_index: 30 + }, // after "complex sample" + ] ); } @@ -174,6 +167,11 @@ mod test { }) .collect::>(); - let _ = reconcile(&contents[0], &contents[1], &contents[2]); + let _ = reconcile( + &contents[0], + &(&contents[1]).into(), + &(&contents[2]).into(), + &*BuiltinTokenizer::Word, + ); } } diff --git a/src/operation_transformation/edited_text.rs b/src/operation_transformation/edited_text.rs index 08dcee3..60f32da 100644 --- a/src/operation_transformation/edited_text.rs +++ b/src/operation_transformation/edited_text.rs @@ -4,13 +4,13 @@ use std::fmt::Debug; use serde::{Deserialize, Serialize}; use crate::{ - CursorPosition, TextWithCursors, + BuiltinTokenizer, CursorPosition, TextWithCursors, operation_transformation::{ Operation, utils::{cook_operations::cook_operations, elongate_operations::elongate_operations}, }, raw_operation::RawOperation, - tokenizer::{Tokenizer, word_tokenizer::word_tokenizer}, + tokenizer::Tokenizer, types::{history::History, side::Side, text_with_history::TextWithHistory}, utils::string_builder::StringBuilder, }; @@ -27,6 +27,7 @@ use crate::{ /// in the original text. The cursor positions are updated when the operations /// are applied, so that the cursor positions can be used to restore the /// cursor positions in the updated text. + #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone, PartialEq, Default)] pub struct EditedText<'a, T> @@ -35,7 +36,7 @@ where { text: &'a str, operations: Vec>, - pub(crate) cursors: Vec, + cursors: Vec, } impl<'a> EditedText<'a, String> { @@ -47,7 +48,7 @@ impl<'a> EditedText<'a, String> { /// whitespaces. #[must_use] pub fn from_strings(original: &'a str, updated: &TextWithCursors, side: Side) -> Self { - Self::from_strings_with_tokenizer(original, updated, &word_tokenizer, side) + Self::from_strings_with_tokenizer(original, updated, &*BuiltinTokenizer::Word, side) } } @@ -219,14 +220,14 @@ where /// Apply the operations to the text and return the resulting text. #[must_use] - pub fn apply(&self) -> String { + pub fn apply(&self) -> TextWithCursors { let mut builder: StringBuilder<'_> = StringBuilder::new(self.text); for operation in &self.operations { builder = operation.apply(builder); } - builder.take() + TextWithCursors::new(builder.take(), self.cursors.clone()) } #[must_use] @@ -291,7 +292,7 @@ mod tests { insta::assert_debug_snapshot!(operations); let new_right = operations.apply(); - assert_eq!(new_right.to_string(), right); + assert_eq!(new_right.text(), right); } #[test] @@ -303,7 +304,7 @@ mod tests { assert_debug_snapshot!(operations); let new_right = operations.apply(); - assert_eq!(new_right.to_string(), text); + assert_eq!(new_right.text(), text); } #[test] @@ -317,6 +318,6 @@ mod tests { let operations_2 = EditedText::from_strings(original, &right.into(), Side::Right); let operations = operations_1.merge(operations_2); - assert_eq!(operations.apply(), expected); + assert_eq!(operations.apply().text(), expected); } } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 608fe93..87de8b5 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1,7 +1,44 @@ +mod word_tokenizer; + +use std::ops::Deref; + +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; use token::Token; +#[cfg(feature = "wasm")] +use wasm_bindgen::prelude::*; pub mod token; -pub mod word_tokenizer; /// A trait for tokenizers that take a string and return a list of tokens. pub type Tokenizer = dyn Fn(&str) -> Vec>; + +#[cfg_attr(feature = "wasm", wasm_bindgen)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[cfg(feature = "wasm")] +pub enum BuiltinTokenizer { + Character = "Character", + Word = "Word", +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[cfg(not(feature = "wasm"))] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub enum BuiltinTokenizer { + Character, + Word, +} + +impl Deref for BuiltinTokenizer { + type Target = Tokenizer; + + fn deref(&self) -> &Self::Target { + match self { + BuiltinTokenizer::Character => todo!(), + BuiltinTokenizer::Word => &word_tokenizer::word_tokenizer, + #[cfg(feature = "wasm")] + BuiltinTokenizer::__Invalid => panic!("Unexpected tokenizer type"), + } + } +} diff --git a/src/types/text_with_cursors.rs b/src/types/text_with_cursors.rs index 1007f7a..ac7c18f 100644 --- a/src/types/text_with_cursors.rs +++ b/src/types/text_with_cursors.rs @@ -32,9 +32,6 @@ impl TextWithCursors { #[must_use] pub fn cursors(&self) -> Vec { self.cursors.clone() } - - #[must_use] - pub fn new_owned(text: String, cursors: Vec) -> Self { Self { text, cursors } } } impl<'a> From<&'a str> for TextWithCursors { @@ -45,3 +42,21 @@ impl<'a> From<&'a str> for TextWithCursors { } } } + +impl From<&String> for TextWithCursors { + fn from(text: &String) -> Self { + Self { + text: text.to_owned(), + cursors: Vec::new(), + } + } +} + +impl From for TextWithCursors { + fn from(text: String) -> Self { + Self { + text, + cursors: Vec::new(), + } + } +} diff --git a/src/wasm.rs b/src/wasm.rs index 73bbb86..3e65ab0 100644 --- a/src/wasm.rs +++ b/src/wasm.rs @@ -13,9 +13,40 @@ use core::str; use wasm_bindgen::prelude::*; -use crate::{ - TextWithCursors, TextWithHistory, reconcile, reconcile_with_cursors, reconcile_with_history, -}; +use crate::{BuiltinTokenizer, CursorPosition, TextWithCursors, TextWithHistory}; + +/// WASM wrapper around `crate::reconcile` for merging text. +#[wasm_bindgen(js_name = reconcile)] +#[must_use] +pub fn reconcile( + parent: &str, + left: &TextWithCursors, + right: &TextWithCursors, + tokenizer: BuiltinTokenizer, +) -> TextWithCursors { + set_panic_hook(); + + crate::reconcile(parent, left, right, &*tokenizer).apply() +} + +/// WASM wrapper around `crate::reconcile` for merging text. +#[wasm_bindgen(js_name = reconcileWithHistory)] +#[must_use] +pub fn reconcile_with_history( + parent: &str, + left: &TextWithCursors, + right: &TextWithCursors, + tokenizer: BuiltinTokenizer, +) -> TextWithCursorsAndHistory { + set_panic_hook(); + let reconciled = crate::reconcile(parent, left, right, &*tokenizer); + let text_with_cursors = reconciled.apply(); + + TextWithCursorsAndHistory { + text_with_cursors, + history: reconciled.apply_with_history(), + } +} /// Merge two documents with a common parent. Relies on `reconcile::reconcile` /// for texts and returns the right document as-is if either of the updated @@ -34,56 +65,35 @@ use crate::{ /// # Panics /// /// If any of the input documents are not valid UTF-8 strings. -#[wasm_bindgen] +#[wasm_bindgen(js_name = genericReconcile)] #[must_use] -pub fn merge(parent: &[u8], left: &[u8], right: &[u8]) -> Vec { +pub fn generic_reconcile( + parent: &[u8], + left: &[u8], + right: &[u8], + tokenizer: BuiltinTokenizer, +) -> Vec { set_panic_hook(); - if is_binary(parent) || is_binary(left) || is_binary(right) { + if crate::is_binary(parent) || crate::is_binary(left) || crate::is_binary(right) { right.to_vec() } else { - reconcile( + crate::reconcile( str::from_utf8(parent).expect("parent must be valid UTF-8 because it's not binary"), - str::from_utf8(left).expect("left must be valid UTF-8 because it's not binary"), - str::from_utf8(right).expect("right must be valid UTF-8 because it's not binary"), + &str::from_utf8(left) + .expect("left must be valid UTF-8 because it's not binary") + .into(), + &str::from_utf8(right) + .expect("right must be valid UTF-8 because it's not binary") + .into(), + &*tokenizer, ) + .apply() + .text() .into_bytes() } } -/// WASM wrapper around `reconcile` for merging text. -#[wasm_bindgen(js_name = mergeText)] -#[must_use] -pub fn merge_text(parent: &str, left: &str, right: &str) -> String { - set_panic_hook(); - - reconcile(parent, left, right) -} - -/// WASM wrapper around `reconcile` for merging text. -#[wasm_bindgen(js_name = mergeTextWithHistory)] -#[must_use] -pub fn merge_text_with_history(parent: &str, left: &str, right: &str) -> Vec { - set_panic_hook(); - - reconcile_with_history(parent, left, right) - .into_iter() - .collect() -} - -/// WASM wrapper around `reconcile::reconcile_with_cursors` for merging text. -#[wasm_bindgen(js_name = mergeTextWithCursors)] -#[must_use] -pub fn merge_text_with_cursors( - parent: &str, - left: &TextWithCursors, - right: &TextWithCursors, -) -> TextWithCursors { - set_panic_hook(); - - reconcile_with_cursors(parent, left, right) -} - /// Heuristically determine if the given data is a binary or a text file's /// content. #[wasm_bindgen(js_name = isBinary)] @@ -98,3 +108,22 @@ fn set_panic_hook() { #[cfg(feature = "console_error_panic_hook")] console_error_panic_hook::set_once(); } + +#[wasm_bindgen] +#[derive(Debug, Clone, PartialEq, Default)] +pub struct TextWithCursorsAndHistory { + text_with_cursors: TextWithCursors, + history: Vec, +} + +#[wasm_bindgen] +impl TextWithCursorsAndHistory { + #[must_use] + pub fn text(&self) -> String { self.text_with_cursors.text() } + + #[must_use] + pub fn cursors(&self) -> Vec { self.text_with_cursors.cursors() } + + #[must_use] + pub fn history(&self) -> Vec { self.history.clone() } +} diff --git a/tests/example_document.rs b/tests/example_document.rs index 08e39a2..ec9d79a 100644 --- a/tests/example_document.rs +++ b/tests/example_document.rs @@ -1,5 +1,5 @@ use pretty_assertions::assert_eq; -use reconcile::{CursorPosition, TextWithCursors}; +use reconcile::{CursorPosition, EditedText, TextWithCursors}; use serde::Deserialize; /// `ExampleDocument` represents a test case for the reconciliation process. @@ -37,7 +37,7 @@ impl ExampleDocument { /// /// If the result string does not match the expected string, the program /// will panic. - pub fn assert_eq(&self, result: &TextWithCursors) { + pub fn assert_eq(&self, result: &EditedText<'_, String>) { let result_str = ExampleDocument::text_with_cursors_to_string(result); assert_eq!( self.expected, result_str, @@ -60,14 +60,16 @@ impl ExampleDocument { ); } - fn text_with_cursors_to_string(document: &TextWithCursors) -> String { - let mut result = document.text().clone(); - for (i, cursor) in document.cursors().iter().enumerate() { + fn text_with_cursors_to_string(document: &EditedText<'_, String>) -> String { + let merged = document.apply(); + let mut result = merged.text(); + for (i, cursor) in merged.cursors().iter().enumerate() { assert!( cursor.char_index <= result.len(), // equals in case of insert at the end - "Cursor index out of bounds: {} > {} when testing for '{result}'", + "Cursor index out of bounds: {} > {} when testing for '{}.'", cursor.char_index, - result.len() + result.len(), + result ); result.insert( @@ -85,7 +87,7 @@ impl ExampleDocument { fn string_to_text_with_cursors(text: &str) -> TextWithCursors { let cursors = Self::parse_cursors(text); let text = text.replace('|', ""); - TextWithCursors::new_owned(text, cursors) + TextWithCursors::new(text, cursors) } fn parse_cursors(text: &str) -> Vec { diff --git a/tests/test.rs b/tests/test.rs index 30d39bb..7c6fe88 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -3,27 +3,33 @@ mod example_document; use std::{fs, path::Path}; use example_document::ExampleDocument; -use reconcile::{reconcile, reconcile_with_cursors}; +use reconcile::{BuiltinTokenizer, reconcile}; use serde::Deserialize; #[test] fn test_document_one_way_without_cursors() { for doc in &get_all_documents() { - doc.assert_eq_without_cursors(&reconcile( - &doc.parent(), - &doc.left().text(), - &doc.right().text(), - )); + doc.assert_eq_without_cursors( + &reconcile( + &doc.parent(), + &doc.left().text().into(), + &doc.right().text().into(), + &*BuiltinTokenizer::Word, + ) + .apply() + .text(), + ); } } #[test] fn test_document_one_way_with_cursors() { for doc in &get_all_documents() { - doc.assert_eq(&reconcile_with_cursors( + doc.assert_eq(&reconcile( &doc.parent(), &doc.left(), &doc.right(), + &*BuiltinTokenizer::Word, )); } } @@ -31,21 +37,27 @@ fn test_document_one_way_with_cursors() { #[test] fn test_document_inverse_way_without_cursors() { for doc in &get_all_documents() { - doc.assert_eq_without_cursors(&reconcile( - &doc.parent(), - &doc.right().text(), - &doc.left().text(), - )); + doc.assert_eq_without_cursors( + &reconcile( + &doc.parent(), + &doc.right().text().into(), + &doc.left().text().into(), + &*BuiltinTokenizer::Word, + ) + .apply() + .text(), + ); } } #[test] fn test_document_inverse_way_with_cursors() { for doc in &get_all_documents() { - doc.assert_eq(&reconcile_with_cursors( + doc.assert_eq(&reconcile( &doc.parent(), &doc.right(), &doc.left(), + &*BuiltinTokenizer::Word, )); } } diff --git a/tests/wasm.rs b/tests/wasm.rs index d081b28..ee584f5 100644 --- a/tests/wasm.rs +++ b/tests/wasm.rs @@ -1,18 +1,18 @@ #![cfg(feature = "wasm")] -use reconcile::{CursorPosition, TextWithCursors, wasm::*}; +use reconcile::{BuiltinTokenizer, CursorPosition, TextWithCursors, wasm::*}; use wasm_bindgen_test::*; #[wasm_bindgen_test(unsupported = test)] fn test_merge() { let left = b"hello "; let right = b"world"; - let result = merge(b"", left, right); + let result = generic_reconcile(b"", left, right, BuiltinTokenizer::Word); assert_eq!(result, b"hello world"); let left = b"\0binary"; let right = b"other"; - let result = merge(b"", left, right); + let result = generic_reconcile(b"", left, right, BuiltinTokenizer::Word); assert_eq!(result, right); } @@ -20,19 +20,20 @@ fn test_merge() { fn test_merge_text() { let left = "hello "; let right = "world"; - let result = merge_text("", left, right); + let result = reconcile("", &left.into(), &right.into(), BuiltinTokenizer::Word).text(); assert_eq!(result, "hello world"); } #[wasm_bindgen_test(unsupported = test)] fn test_merge_text_with_cursors() { - let result = merge_text_with_cursors( + let result = reconcile( "hi", &TextWithCursors::new("hi world".to_owned(), vec![]), &TextWithCursors::new( "hi".to_owned(), vec![CursorPosition::new(0, 1), CursorPosition::new(1, 2)], ), + BuiltinTokenizer::Word, ); assert_eq!( @@ -48,7 +49,10 @@ fn test_merge_text_with_cursors() { fn merge_binary() { let left = [0, 1, 2]; let right = [3, 4, 5]; - assert_eq!(merge(b"", &left, &right), right); + assert_eq!( + generic_reconcile(b"", &left, &right, BuiltinTokenizer::Word), + right + ); } #[wasm_bindgen_test(unsupported = test)]