diff --git a/Cargo.lock b/Cargo.lock index bf51cb7..83a551f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -124,6 +124,12 @@ version = "0.4.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" +[[package]] +name = "memchr" +version = "2.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" + [[package]] name = "memory_units" version = "0.4.0" @@ -182,6 +188,7 @@ dependencies = [ "insta", "pretty_assertions", "serde", + "serde_json", "serde_yaml", "test-case", "wasm-bindgen", @@ -212,24 +219,47 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.219" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.219" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", "syn", ] +[[package]] +name = "serde_json" +version = "1.0.145" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", + "serde_core", +] + [[package]] name = "serde_yaml" version = "0.9.34+deprecated" diff --git a/Cargo.toml b/Cargo.toml index 8ee40fa..0d625a1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,6 +24,7 @@ path = "examples/merge-file.rs" serde = { version = "1.0.219", optional = true, features = ["derive"] } wasm-bindgen = { version = "0.2.99", optional = true } +serde_json = { version = "1.0.145", optional = true } # The `console_error_panic_hook` crate provides better debugging of panics by # logging them with `console.error`. This is great for development, but requires @@ -36,8 +37,9 @@ wee_alloc = { version = "0.4.2", optional = true } [features] default = [] serde = [ "dep:serde" ] -wasm = [ "dep:wasm-bindgen", "dep:wee_alloc" ] +wasm = [ "dep:wasm-bindgen", "dep:wee_alloc", "dep:serde_json", "serde" ] console_error_panic_hook = [ "dep:console_error_panic_hook" ] +all = [ "wasm", "console_error_panic_hook" ] [dev-dependencies] insta = "1.42.2" diff --git a/README.md b/README.md index c3675c5..a5e5a46 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,7 @@ A Rust and TypeScript library for merging conflicting text edits without manual ### Rust Install via crates.io: + ```sh cargo add reconcile-text ``` @@ -97,7 +98,7 @@ Differential sync is implemented by [universal-sync](https://github.com/invisibl 3. **Diff optimisation** — Operations are reordered and consolidated to maximise chained changes 4. **Operational Transformation** — Edits are woven together using OT principles, preserving all modifications and updating cursors -Whilst the primary goal of `reconcile-text` isn't to implement OT, it provides an elegant way to merge Myers' diff outputs. (For a dedicated Rust OT implementation, see [operational-transform-rs](https://github.com/spebern/operational-transform-rs).) The same could be achieved with CRDTs, which many libraries implement well for text—see [Loro](https://github.com/loro-dev/loro/), [cola](https://github.com/nomad/cola), and [automerge](https://github.com/automerge/automerge) as excellent examples. +Whilst the primary goal of `reconcile-text` isn't to implement OT, it provides an elegant way to merge Myers' diff outputs. (For a dedicated Rust OT implementation, see [operational-transform-rs](https://github.com/spebern/operational-transform-rs).) The same could be achieved with CRDTs, which many libraries implement well for text—see [Loro](https://github.com/loro-dev/loro/), [cola](https://github.com/nomad/cola), and [automerge](https://github.com/automerge/automerge) as excellent examples. However, when only the end result of concurrent changes is observable, merge quality depends entirely on the quality of the underlying 2-way diffs. For instance, `move` operations cannot be supported because Myers' algorithm decomposes them into separate `insert` and `delete` operations, regardless of the merging algorithm used. @@ -146,7 +147,7 @@ Contributions are welcome! [MIT](./LICENSE) [1]:https://marijnhaverbeke.nl/blog/collaborative-editing-cm.html -[2]: https://neil.fraser.name/writing/sync/ +[2]: https://neil.fraser.name/writing/sync/ [3]: https://www.cis.upenn.edu/~bcpierce/papers/diff3-short.pdf [4]: https://blog.jcoglan.com/2017/05/08/merging-with-diff3/ [5]: https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/35605.pdf diff --git a/reconcile-js/package-lock.json b/reconcile-js/package-lock.json index 8647e28..b213742 100644 --- a/reconcile-js/package-lock.json +++ b/reconcile-js/package-lock.json @@ -24,7 +24,7 @@ }, "../pkg": { "name": "reconcile-text", - "version": "0.4.10", + "version": "0.5.0", "dev": true, "license": "MIT" }, diff --git a/reconcile-js/src/index.ts b/reconcile-js/src/index.ts index efc18ae..247db26 100644 --- a/reconcile-js/src/index.ts +++ b/reconcile-js/src/index.ts @@ -5,6 +5,7 @@ import { SpanWithHistory as wasmSpanWithHistory, reconcileWithHistory as wasmReconcileWithHistory, isBinary as wasmIsBinary, + getCompactDiff as wasmGetCompactDiff, initSync, } from 'reconcile-text'; @@ -179,6 +180,40 @@ export function reconcile( return jsResult; } +/** + * Generates a compact diff representation between an original and changed text. + * + * These can be parsed and unpacked using Rust crate's EditedText::from_change_set. + * + * This function computes the differences between two versions of text and returns + * a compact string representation of those changes. The returned format is + * serialised JSON. + * + * @param original - The original/base version of the text + * @param changed - The modified version of the text (either string or TextWithCursors with cursor positions) + * @param tokenizer - The tokenisation strategy, which is the same as used in `reconcile`. + * @returns A compact string representation of the diff between original and changed text + */ +export function getCompactDiff( + original: string, + changed: string | TextWithOptionalCursors, + tokenizer: BuiltinTokenizer = 'Word' +): string { + init(); + + if (!BUILTIN_TOKENIZERS.includes(tokenizer)) { + throw new Error(UNSUPPORTED_TOKENIZER_ERROR); + } + + const changedWasm = toWasmTextWithCursors(changed); + + const result = wasmGetCompactDiff(original, changedWasm, tokenizer); + + changedWasm.free(); + + return result; +} + /** * Merges three versions of text and returns detailed provenance information. * diff --git a/scripts/test.sh b/scripts/test.sh index 7089ee8..7eb816c 100755 --- a/scripts/test.sh +++ b/scripts/test.sh @@ -3,9 +3,8 @@ set -e wasm-pack build --target web --features wasm -cargo test --verbose -- --include-ignored -cargo test --features serde -cargo test --features wasm +cargo test --verbose --features serde -- --include-ignored +cargo test --features serde,wasm wasm-pack test --node --features wasm cd reconcile-js diff --git a/src/lib.rs b/src/lib.rs index f745c17..1dd78ff 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -151,6 +151,48 @@ //! ] //! ); //! ``` +//! ## Efficiently serialize changes +//! +//! The edits can be serialized into a compact representation without the full +//! original text, making the size only depends on the changes made. +//! +//! ```rust +//! use reconcile_text::{EditedText, BuiltinTokenizer}; +//! use serde_yaml; +//! use pretty_assertions::assert_eq; +//! +//! +//! let original = "Merging text is hard!"; +//! let changes = "Merging text is easy with reconcile!"; +//! +//! let result = EditedText::from_strings( +//! original, +//! &changes.into() +//! ); +//! +//! let serialized = serde_yaml::to_string(&result.to_change_set()).unwrap(); +//! assert_eq!( +//! serialized, +//! concat!( +//! "operations:\n", +//! "- 15\n", +//! "- -6\n", +//! "- ' easy with reconcile!'\n", +//! "cursors: []\n" +//! ) +//! ); +//! +//! let deserialized = serde_yaml::from_str(&serialized).unwrap(); +//! let reconstructed = EditedText::from_change_set( +//! original, +//! deserialized, +//! &*BuiltinTokenizer::Word +//! ); +//! assert_eq!( +//! reconstructed.apply().text(), +//! "Merging text is easy with reconcile!" +//! ); +//! ``` //! //! ## Error handling //! @@ -169,7 +211,7 @@ mod tokenizer; mod types; mod utils; -pub use operation_transformation::{EditedText, reconcile}; +pub use operation_transformation::{ChangeSet, EditedText, reconcile}; pub use tokenizer::{BuiltinTokenizer, Tokenizer, token::Token}; pub use types::{ cursor_position::CursorPosition, history::History, side::Side, diff --git a/src/operation_transformation.rs b/src/operation_transformation.rs index 10bda6d..e1f173a 100644 --- a/src/operation_transformation.rs +++ b/src/operation_transformation.rs @@ -1,15 +1,14 @@ mod edited_text; mod operation; +mod transport; mod utils; use std::fmt::Debug; pub use edited_text::EditedText; pub use operation::Operation; +pub use transport::ChangeSet; -use crate::{ - Tokenizer, - types::{side::Side, text_with_cursors::TextWithCursors}, -}; +use crate::{Tokenizer, types::text_with_cursors::TextWithCursors}; /// Given an `original` document and two concurrent edits to it, /// return a document containing all changes from both `left` @@ -48,10 +47,8 @@ pub fn reconcile<'a, T>( where T: PartialEq + Clone + Debug, { - let left_operations = - EditedText::from_strings_with_tokenizer(original, left, tokenizer, Side::Left); - let right_operations = - EditedText::from_strings_with_tokenizer(original, right, tokenizer, Side::Right); + let left_operations = EditedText::from_strings_with_tokenizer(original, left, tokenizer); + let right_operations = EditedText::from_strings_with_tokenizer(original, right, tokenizer); left_operations.merge(right_operations) } diff --git a/src/operation_transformation/edited_text.rs b/src/operation_transformation/edited_text.rs index 174cfaa..a6465fe 100644 --- a/src/operation_transformation/edited_text.rs +++ b/src/operation_transformation/edited_text.rs @@ -1,12 +1,13 @@ -use std::fmt::Debug; +use std::{fmt::Debug, vec}; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; use crate::{ - BuiltinTokenizer, CursorPosition, TextWithCursors, + BuiltinTokenizer, ChangeSet, CursorPosition, TextWithCursors, operation_transformation::{ Operation, + transport::SimpleOperation, utils::{cook_operations::cook_operations, elongate_operations::elongate_operations}, }, raw_operation::RawOperation, @@ -35,6 +36,7 @@ where { text: &'a str, operations: Vec>, + operation_sides: Vec, cursors: Vec, } @@ -46,8 +48,8 @@ impl<'a> EditedText<'a, String> { /// word tokenizer is used to tokenize the text which splits the text on /// whitespaces. #[must_use] - pub fn from_strings(original: &'a str, updated: &TextWithCursors, side: Side) -> Self { - Self::from_strings_with_tokenizer(original, updated, &*BuiltinTokenizer::Word, side) + pub fn from_strings(original: &'a str, updated: &TextWithCursors) -> Self { + Self::from_strings_with_tokenizer(original, updated, &*BuiltinTokenizer::Word) } } @@ -64,16 +66,18 @@ where original: &'a str, updated: &TextWithCursors, tokenizer: &Tokenizer, - side: Side, ) -> Self { let original_tokens = (tokenizer)(original); let updated_tokens = (tokenizer)(&updated.text()); let diff: Vec> = RawOperation::vec_from(&original_tokens, &updated_tokens); + let operations: Vec> = cook_operations(elongate_operations(diff)).collect(); + let operation_count = operations.len(); Self::new( original, - cook_operations(elongate_operations(diff), side).collect(), + operations, + vec![Side::Left; operation_count], updated.cursors(), ) } @@ -81,12 +85,18 @@ where /// Create a new `EditedText` with the given operations. /// The operations must be in the order in which they are meant to be /// applied. The operations must not overlap. - fn new(text: &'a str, operations: Vec>, mut cursors: Vec) -> Self { + fn new( + text: &'a str, + operations: Vec>, + operation_sides: Vec, + mut cursors: Vec, + ) -> Self { cursors.sort_by_key(|cursor| cursor.char_index); Self { text, operations, + operation_sides, cursors, } } @@ -109,6 +119,8 @@ where let mut merged_operations: Vec> = Vec::with_capacity(self.operations.len() + other.operations.len()); + let mut merged_operation_sides: Vec = + Vec::with_capacity(self.operations.len() + other.operations.len()); let mut left_iter = self.operations.into_iter(); let mut right_iter = other.operations.into_iter(); @@ -149,7 +161,7 @@ where ); let original_length = operation.len(); - let result = match side { + let (side, result) = match side { Side::Left => { let result = operation.merge_operations(&mut last_other_op); @@ -181,7 +193,7 @@ where maybe_left_op = left_iter.next(); last_left_op = Some(result.clone()); - result + (Side::Left, result) } Side::Right => { let result = operation.merge_operations(&mut last_other_op); @@ -214,7 +226,7 @@ where maybe_right_op = right_iter.next(); last_right_op = Some(result.clone()); - result + (Side::Right, result) } }; @@ -227,13 +239,21 @@ where } merged_operations.push(result); + merged_operation_sides.push(side); } for cursor in left_cursors.chain(right_cursors) { merged_cursors.push(cursor.with_index(merged_length)); } - Self::new(self.text, merged_operations, merged_cursors) + debug_assert_eq!(merged_operations.len(), merged_operation_sides.len()); + + Self::new( + self.text, + merged_operations, + merged_operation_sides, + merged_cursors, + ) } /// Apply the operations to the text and return the resulting text. @@ -288,14 +308,14 @@ where let mut history = Vec::with_capacity(self.operations.len()); - for operation in &self.operations { + for (operation, side) in self.operations.iter().zip(self.operation_sides.iter()) { builder = operation.apply(builder); match operation { Operation::Equal { .. } => { history.push(SpanWithHistory::new(builder.take(), History::Unchanged)); } - Operation::Insert { side, .. } => match side { + Operation::Insert { .. } => match side { Side::Left => { history.push(SpanWithHistory::new(builder.take(), History::AddedFromLeft)); } @@ -307,7 +327,6 @@ where Operation::Delete { deleted_character_count, order, - side, .. } => { let deleted = self.text[*order..*order + *deleted_character_count].to_string(); @@ -325,6 +344,37 @@ where history } + + /// Serialize the `EditedText` as a `ChangeSet`, which contains only + /// the operations and cursor positions, but without the original text. + /// This is useful for sending changes over the network if there's + /// a clear consensus on the original text. + #[must_use] + pub fn to_change_set(&self) -> ChangeSet { + ChangeSet::new( + SimpleOperation::from_operations(&self.operations), + self.cursors.clone(), + ) + } + + /// Deserialize an `EditedText` from a `ChangeSet` and the original text. + /// This is useful for reconstructing the `EditedText` on the receiving + /// end after sending only the `ChangeSet` over the network. + #[must_use] + pub fn from_change_set( + text: &'a str, + change_set: ChangeSet, + tokenizer: &Tokenizer, + ) -> EditedText<'a, T> { + let operations = SimpleOperation::to_operations(change_set.operations, text, tokenizer); + let operation_count = operations.len(); + EditedText::new( + text, + operations, + vec![Side::Left; operation_count], + change_set.cursors, + ) + } } #[cfg(test)] @@ -339,7 +389,7 @@ mod tests { let left = "hello world! How are you? Adam"; let right = "Hello, my friend! How are you doing? Albert"; - let operations = EditedText::from_strings(left, &right.into(), Side::Right); + let operations = EditedText::from_strings(left, &right.into()); insta::assert_debug_snapshot!(operations); @@ -351,7 +401,7 @@ mod tests { fn test_calculate_operations_with_no_diff() { let text = "hello world!"; - let operations = EditedText::from_strings(text, &text.into(), Side::Right); + let operations = EditedText::from_strings(text, &text.into()); assert_debug_snapshot!(operations); @@ -366,10 +416,42 @@ mod tests { let right = "Hello world! How are you?"; let expected = "Hello world! How are you? I'm Andras."; - let operations_1 = EditedText::from_strings(original, &left.into(), Side::Left); - let operations_2 = EditedText::from_strings(original, &right.into(), Side::Right); + let operations_1 = EditedText::from_strings(original, &left.into()); + let operations_2 = EditedText::from_strings(original, &right.into()); let operations = operations_1.merge(operations_2); assert_eq!(operations.apply().text(), expected); } + + #[test] + fn test_change_set_deserialisation() { + let original = "Merging text is hard!"; + let changes = "Merging text is easy with reconcile!"; + let result = EditedText::from_strings(original, &changes.into()); + let serialized = serde_yaml::to_string(&result.to_change_set()).unwrap(); + + let expected = concat!( + "operations:\n", + "- 15\n", + "- -6\n", + "- ' easy with reconcile!'\n", + "cursors: []\n" + ); + + assert_eq!(serialized, expected); + } + + #[test] + fn test_change_set_serialization() { + let original = "The quick brown fox jumps over the lazy dog."; + let updated = "The quick red fox jumped over the very lazy dog!"; + + let edited_text = EditedText::from_strings(original, &updated.into()); + + let change_set = edited_text.to_change_set(); + let deserialized_edited_text = + EditedText::from_change_set(original, change_set, &*BuiltinTokenizer::Word); + + assert_eq!(deserialized_edited_text.apply().text(), updated); + } } diff --git a/src/operation_transformation/operation.rs b/src/operation_transformation/operation.rs index 1c3060c..7a8f92a 100644 --- a/src/operation_transformation/operation.rs +++ b/src/operation_transformation/operation.rs @@ -4,7 +4,7 @@ use core::fmt::{Debug, Display}; use serde::{Deserialize, Serialize}; use crate::{ - Side, Token, + Token, utils::{ find_longest_prefix_contained_within::find_longest_prefix_contained_within, string_builder::StringBuilder, @@ -23,23 +23,21 @@ where length: usize, #[cfg(debug_assertions)] + #[cfg_attr(feature = "serde", serde(skip_serializing))] text: Option, }, Insert { - side: Side, - order: usize, text: Vec>, }, Delete { - side: Side, - order: usize, deleted_character_count: usize, #[cfg(debug_assertions)] + #[cfg_attr(feature = "serde", serde(skip_serializing))] deleted_text: Option, }, } @@ -72,15 +70,14 @@ where } /// Creates an insert operation with the given index and text. - pub fn create_insert(order: usize, text: Vec>, side: Side) -> Self { - Operation::Insert { side, order, text } + pub fn create_insert(order: usize, text: Vec>) -> Self { + Operation::Insert { order, text } } /// Creates a delete operation with the given index and number of /// to-be-deleted characters. - pub fn create_delete(order: usize, deleted_character_count: usize, side: Side) -> Self { + pub fn create_delete(order: usize, deleted_character_count: usize) -> Self { Operation::Delete { - side, order, deleted_character_count, @@ -89,9 +86,8 @@ where } } - pub fn create_delete_with_text(order: usize, text: String, side: Side) -> Self { + pub fn create_delete_with_text(order: usize, text: String) -> Self { Operation::Delete { - side, order, deleted_character_count: text.chars().count(), @@ -206,7 +202,7 @@ where match (operation, previous_operation) { ( - Operation::Insert { side, order, text }, + Operation::Insert { order, text }, Some(Operation::Insert { text: previous_inserted_text, .. @@ -218,12 +214,11 @@ where let offset_in_tokens = find_longest_prefix_contained_within(previous_inserted_text, &text); - Operation::create_insert(order, text[offset_in_tokens..].to_vec(), side) + Operation::create_insert(order, text[offset_in_tokens..].to_vec()) } ( Operation::Delete { - side, order, deleted_character_count, @@ -247,20 +242,19 @@ where #[cfg(debug_assertions)] let updated_delete = deleted_text.as_ref().map_or_else( - || Operation::create_delete(order + overlap, new_length, side), + || Operation::create_delete(order + overlap, new_length), |text| { Operation::create_delete_with_text( order + overlap, text.chars() .skip(deleted_character_count - new_length) .collect::(), - side, ) }, ); #[cfg(not(debug_assertions))] - let updated_delete = Operation::create_delete(order + overlap, new_length, side); + let updated_delete = Operation::create_delete(order + overlap, new_length); updated_delete } @@ -405,8 +399,7 @@ mod tests { #[test] fn test_apply_delete_with_create() { let builder = StringBuilder::new("hello world"); - let delete_operation = - Operation::<()>::create_delete_with_text(0, "hello ".to_owned(), Side::Left); + let delete_operation = Operation::<()>::create_delete_with_text(0, "hello ".to_owned()); let retain_operation = Operation::<()>::create_equal(6, 5); let mut builder = delete_operation.apply(builder); @@ -420,7 +413,7 @@ mod tests { let builder = StringBuilder::new("hello"); let retain_operation = Operation::<()>::create_equal(0, 5); - let insert_operation = Operation::create_insert(5, vec![" my friend".into()], Side::Right); + let insert_operation = Operation::create_insert(5, vec![" my friend".into()]); let mut builder = retain_operation.apply(builder); builder = insert_operation.apply(builder); diff --git a/src/operation_transformation/snapshots/reconcile_text__operation_transformation__edited_text__tests__calculate_operations.snap b/src/operation_transformation/snapshots/reconcile_text__operation_transformation__edited_text__tests__calculate_operations.snap index abbabbd..0096f0e 100644 --- a/src/operation_transformation/snapshots/reconcile_text__operation_transformation__edited_text__tests__calculate_operations.snap +++ b/src/operation_transformation/snapshots/reconcile_text__operation_transformation__edited_text__tests__calculate_operations.snap @@ -1,7 +1,6 @@ --- source: src/operation_transformation/edited_text.rs expression: operations -snapshot_kind: text --- EditedText { text: "hello world! How are you? Adam", @@ -15,5 +14,15 @@ EditedText { , , ], + operation_sides: [ + Left, + Left, + Left, + Left, + Left, + Left, + Left, + Left, + ], cursors: [], } diff --git a/src/operation_transformation/snapshots/reconcile_text__operation_transformation__edited_text__tests__calculate_operations_with_no_diff.snap b/src/operation_transformation/snapshots/reconcile_text__operation_transformation__edited_text__tests__calculate_operations_with_no_diff.snap index 275a552..cf6a674 100644 --- a/src/operation_transformation/snapshots/reconcile_text__operation_transformation__edited_text__tests__calculate_operations_with_no_diff.snap +++ b/src/operation_transformation/snapshots/reconcile_text__operation_transformation__edited_text__tests__calculate_operations_with_no_diff.snap @@ -1,7 +1,6 @@ --- source: src/operation_transformation/edited_text.rs expression: operations -snapshot_kind: text --- EditedText { text: "hello world!", @@ -10,5 +9,10 @@ EditedText { , , ], + operation_sides: [ + Left, + Left, + Left, + ], cursors: [], } diff --git a/src/operation_transformation/transport.rs b/src/operation_transformation/transport.rs new file mode 100644 index 0000000..67c25e5 --- /dev/null +++ b/src/operation_transformation/transport.rs @@ -0,0 +1,204 @@ +use std::fmt::Debug; + +#[cfg(feature = "serde")] +use serde::{ + Deserialize, Serialize, + de::{self, Deserializer, Visitor}, + ser::Serializer, +}; + +use crate::{CursorPosition, Tokenizer, operation_transformation::Operation}; + +#[derive(Clone, PartialEq, Eq, Debug)] +pub enum SimpleOperation { + Equal { length: usize }, + Insert { text: String }, + Delete { length: usize }, +} + +impl SimpleOperation { + pub fn from_operations(operation: &Vec>) -> Vec + where + T: PartialEq + Clone + Debug, + { + let mut result: Vec = Vec::with_capacity(operation.len()); + let mut previous_equal: Option = None; + + for operation in operation { + match operation { + Operation::Equal { length, .. } => { + if let Some(prev_length) = previous_equal { + previous_equal = Some(prev_length + *length); + } else { + previous_equal = Some(*length); + } + } + + Operation::Insert { text, .. } => { + if let Some(prev_length) = previous_equal { + result.push(SimpleOperation::Equal { + length: prev_length, + }); + previous_equal = None; + } + + let text: String = text + .iter() + .map(super::super::tokenizer::token::Token::original) + .collect(); + result.push(SimpleOperation::Insert { text }); + } + + Operation::Delete { + deleted_character_count, + .. + } => { + if let Some(prev_length) = previous_equal { + result.push(SimpleOperation::Equal { + length: prev_length, + }); + previous_equal = None; + } + + result.push(SimpleOperation::Delete { + length: *deleted_character_count, + }); + } + } + } + + if let Some(prev_length) = previous_equal { + result.push(SimpleOperation::Equal { + length: prev_length, + }); + } + + result + } + + // This is similar to `crate::operation_transformation::utils::cook_operations` + pub fn to_operations( + simple_operations: Vec, + original_text: &str, + tokenizer: &Tokenizer, + ) -> Vec> + where + T: PartialEq + Clone + Debug, + { + let mut operations: Vec> = Vec::with_capacity(simple_operations.len()); + let mut order = 0; + + for simple_operation in simple_operations { + match simple_operation { + SimpleOperation::Equal { length } => { + let original_characters: String = + original_text.chars().skip(order).take(length).collect(); + + let original_tokens = tokenizer(&original_characters); + for token in original_tokens { + operations + .push(Operation::create_equal(order, token.get_original_length())); + order += token.get_original_length(); + } + } + + SimpleOperation::Insert { text } => { + let tokens = tokenizer(&text); + operations.push(Operation::create_insert(order, tokens)); + } + + SimpleOperation::Delete { length } => { + operations.push(Operation::create_delete(order, length)); + order += length; + } + } + } + + operations + } +} + +#[cfg(feature = "serde")] +impl Serialize for SimpleOperation { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + // neat idea from https://github.com/spebern/operational-transform-rs/blob/9faa17f0a2b282ac2e09dbb2d29fdaf2ae0bbb4a/operational-transform/src/serde.rs#L14 + match self { + SimpleOperation::Equal { length } => serializer.serialize_u64(*length as u64), + SimpleOperation::Insert { text } => serializer.serialize_str(text), + SimpleOperation::Delete { length } => { + serializer.serialize_i64(-(i64::try_from(*length).unwrap_or(i64::MAX))) + } + } + } +} + +#[cfg(feature = "serde")] +impl<'de> Deserialize<'de> for SimpleOperation { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + use std::fmt; + + struct OperationVisitor; + + impl Visitor<'_> for OperationVisitor { + type Value = SimpleOperation; + + fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + formatter.write_str("an integer between -2^63 and 2^64-1 or a string") + } + + fn visit_u64(self, value: u64) -> Result + where + E: de::Error, + { + Ok(SimpleOperation::Equal { + length: usize::try_from(value).unwrap_or(usize::MAX), + }) + } + + fn visit_i64(self, value: i64) -> Result + where + E: de::Error, + { + Ok(SimpleOperation::Delete { + length: usize::try_from(-value).unwrap_or(usize::MAX), + }) + } + + fn visit_str(self, value: &str) -> Result + where + E: de::Error, + { + Ok(SimpleOperation::Insert { + text: value.to_owned(), + }) + } + } + + deserializer.deserialize_any(OperationVisitor) + } +} + +/// A serializable representation of the changes made to a text document +/// without the original text. +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[derive(Debug, Clone, PartialEq, Default)] +pub struct ChangeSet { + pub operations: Vec, + pub cursors: Vec, +} + +impl ChangeSet { + #[must_use] + pub fn new(operations: Vec, cursors: Vec) -> Self { + Self { + operations, + cursors, + } + } +} diff --git a/src/operation_transformation/utils/cook_operations.rs b/src/operation_transformation/utils/cook_operations.rs index 0b188cc..2f1d0ac 100644 --- a/src/operation_transformation/utils/cook_operations.rs +++ b/src/operation_transformation/utils/cook_operations.rs @@ -1,10 +1,10 @@ use std::fmt::Debug; -use crate::{operation_transformation::Operation, raw_operation::RawOperation, types::side::Side}; +use crate::{operation_transformation::Operation, raw_operation::RawOperation}; /// Turn raw operations into ordered operations while keeping track of the /// original token's indexes. -pub fn cook_operations(raw_operations: I, side: Side) -> impl Iterator> +pub fn cook_operations(raw_operations: I) -> impl Iterator> where I: IntoIterator>, T: PartialEq + Clone + Debug, @@ -29,18 +29,15 @@ where op } - RawOperation::Insert(tokens) => { - Operation::create_insert(original_text_index, tokens, side) - } + RawOperation::Insert(tokens) => Operation::create_insert(original_text_index, tokens), RawOperation::Delete(..) => { let op = if cfg!(debug_assertions) { Operation::create_delete_with_text( original_text_index, raw_operation.get_original_text(), - side, ) } else { - Operation::create_delete(original_text_index, length, side) + Operation::create_delete(original_text_index, length) }; original_text_index += length; diff --git a/src/utils/string_builder.rs b/src/utils/string_builder.rs index d77974a..34110d8 100644 --- a/src/utils/string_builder.rs +++ b/src/utils/string_builder.rs @@ -1,4 +1,4 @@ -use std::iter::Iterator; +use std::{fmt, iter::Iterator}; /// A helper for building a string in-order based on an original string and a /// series of insertions, deletions, and copies applied to it. It is safe to use @@ -12,6 +12,18 @@ pub struct StringBuilder<'a> { remaining: String, } +impl fmt::Debug for StringBuilder<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut debug_struct = f.debug_struct("StringBuilder"); + debug_struct.field("buffer", &self.buffer); + + #[cfg(debug_assertions)] + debug_struct.field("remaining", &self.remaining); + + debug_struct.finish_non_exhaustive() + } +} + impl StringBuilder<'_> { pub fn new(original: &str) -> StringBuilder<'_> { StringBuilder { diff --git a/src/wasm.rs b/src/wasm.rs index 8cf080a..0fd0aca 100644 --- a/src/wasm.rs +++ b/src/wasm.rs @@ -87,6 +87,25 @@ pub fn generic_reconcile( } } +/// WASM wrapper around getting a compact diff representation as a JSON string +/// +/// # Panics +/// +/// If serialization to JSON fails which should not happen +#[wasm_bindgen(js_name = getCompactDiff)] +#[must_use] +pub fn get_compact_diff( + parent: &str, + changed: &TextWithCursors, + tokenizer: BuiltinTokenizer, +) -> String { + set_panic_hook(); + let edited_text = crate::EditedText::from_strings_with_tokenizer(parent, changed, &*tokenizer); + let change_set = edited_text.to_change_set(); + + serde_json::to_string(&change_set).expect("Failed to serialize change set") +} + /// Heuristically determine if the given data is a binary or a text file's /// content. #[wasm_bindgen(js_name = isBinary)] diff --git a/tests/test.rs b/tests/test.rs index e9968b9..e8fae7d 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -3,7 +3,7 @@ mod example_document; use std::{fs, path::Path}; use example_document::ExampleDocument; -use reconcile_text::{BuiltinTokenizer, reconcile}; +use reconcile_text::{BuiltinTokenizer, EditedText, reconcile}; use serde::Deserialize; #[test] @@ -34,6 +34,35 @@ fn test_document_one_way_with_cursors() { } } +#[test] +fn test_document_one_way_with_cursors_and_serialisation() { + for doc in &get_all_documents() { + let parent = doc.parent(); + let left_operations = + EditedText::from_strings_with_tokenizer(&parent, &doc.left(), &*BuiltinTokenizer::Word); + let right_operations = EditedText::from_strings_with_tokenizer( + &parent, + &doc.right(), + &*BuiltinTokenizer::Word, + ); + + let serialised_left = + serde_yaml::from_str(&serde_yaml::to_string(&left_operations.to_change_set()).unwrap()) + .unwrap(); + let serialised_right = serde_yaml::from_str( + &serde_yaml::to_string(&right_operations.to_change_set()).unwrap(), + ) + .unwrap(); + + let restored_left_operations = + EditedText::from_change_set(&parent, serialised_left, &*BuiltinTokenizer::Word); + let restored_right_operations = + EditedText::from_change_set(&parent, serialised_right, &*BuiltinTokenizer::Word); + + doc.assert_eq(&restored_left_operations.merge(restored_right_operations)); + } +} + #[test] fn test_document_inverse_way_without_cursors() { for doc in &get_all_documents() { diff --git a/tests/wasm.rs b/tests/wasm.rs index 5ec5f35..6a9d556 100644 --- a/tests/wasm.rs +++ b/tests/wasm.rs @@ -46,7 +46,7 @@ fn test_merge_text_with_cursors() { } #[wasm_bindgen_test(unsupported = test)] -fn merge_binary() { +fn test_merge_binary() { let left = [0, 1, 2]; let right = [3, 4, 5]; assert_eq!( @@ -62,6 +62,14 @@ fn test_is_binary() { assert!(!is_binary(b"hello")); } +#[wasm_bindgen_test(unsupported = test)] +fn test_get_compact_diff() { + let parent = "hello "; + let changed = "world"; + let result = get_compact_diff(parent, &changed.into(), BuiltinTokenizer::Word); + assert_eq!(result, "{\"operations\":[-6,\"world\"],\"cursors\":[]}"); +} + #[wasm_bindgen_test(unsupported = test)] fn test_is_binary_empty() { assert!(!is_binary(b""));