diff --git a/backend/reconcile/src/operation_transformation/edited_text.rs b/backend/reconcile/src/operation_transformation/edited_text.rs new file mode 100644 index 0000000..3decbac --- /dev/null +++ b/backend/reconcile/src/operation_transformation/edited_text.rs @@ -0,0 +1,279 @@ +use std::borrow::BorrowMut; + +use super::{operation, Operation}; +use crate::diffs::raw_operation::RawOperation; +use crate::errors::SyncLibError; +use crate::operation_transformation::merge_context::MergeContext; +use crate::tokenizer::token::Token; +use crate::utils::ordered_operation::OrderedOperation; +use crate::utils::side::Side; +use crate::{diffs::myers::diff, utils::merge_iters::MergeSorted}; +use ropey::Rope; + +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; + +/// A sequence of operations that can be applied to a text document. +/// EditedText supports merging two sequences of operations using the +/// principle of Operational Transformation. +/// +/// It's mainly created through the from_strings method, then merged with another +/// EditedText derived from the same original text and then applied to the original text +/// to get the reconciled text of concurrent edits. +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[derive(Debug, Clone, PartialEq, Eq, Hash, Default)] +pub struct EditedText<'a> { + text: &'a str, + operations: Vec, +} + +impl<'a> EditedText<'a> { + /// Create an EditedText from the given original (old) and updated (new) strings. + /// The returned EditedText represents the changes from the original to the updated text. + /// When the return value is applied to the original text, it will result in the updated text. + pub fn from_strings(original: &'a str, updated: &str) -> Self { + let original_tokens = Token::tokenize(original); + let updated_tokens = Token::tokenize(updated); + + let diff: Vec = diff(&original_tokens, &updated_tokens); + + Self::new( + original, + Self::elongate_operations(Self::cook_operations(diff)), + ) + } + + // Turn raw operations into ordered operations while keeping track of old & new indexes. + fn cook_operations(raw_operations: Vec) -> Vec { + let mut new_index = 0; // this is the start index of the operation on the new text + let mut order = 0; // this is the start index of the operation on the original text + + raw_operations + .into_iter() + .flat_map(|raw_operation| { + let length = raw_operation.original_text_length(); + + let operation = match raw_operation { + RawOperation::Equal(..) => { + new_index += length; + order += length; + + None + } + RawOperation::Insert(..) => { + let op = + Operation::create_insert(new_index, raw_operation.get_original_text()) + .map(|operation| OrderedOperation { order, operation }); + + new_index += length; + + op + } + RawOperation::Delete(..) => { + let op = if cfg!(debug_assertions) { + Operation::create_delete_with_text( + new_index, + raw_operation.get_original_text(), + ) + } else { + Operation::create_delete(new_index, length) + } + .map(|operation| OrderedOperation { order, operation }); + + order += length; + + op + } + }; + + operation.into_iter() + }) + .collect() + } + + // TODO: shift ops befor compacting + fn elongate_operations(operations: Vec) -> Vec { + let mut maybe_previous: Option = None; + + let mut result: Vec = operations + .into_iter() + .flat_map(|next| { + if let Some(previous) = maybe_previous.take() { + match (previous, next) { + ( + previous @ OrderedOperation { + operation: Operation::Insert { .. }, + .. + }, + next @ OrderedOperation { + operation: Operation::Insert { .. }, + .. + }, + ) if previous.operation.end_index() + 1 == next.operation.start_index() => { + maybe_previous = Some(OrderedOperation { + order: previous.order, + operation: previous.operation.extend(&next.operation), + }); + None + } + ( + previous @ OrderedOperation { + operation: Operation::Delete { .. }, + .. + }, + next @ OrderedOperation { + operation: Operation::Delete { .. }, + .. + }, + ) if previous.operation.start_index() == next.operation.start_index() => { + maybe_previous = Some(OrderedOperation { + order: previous.order, + operation: previous.operation.extend(&next.operation), + }); + None + } + (previous, next) => { + maybe_previous = Some(next); + Some(previous) + } + } + } else { + maybe_previous = Some(next.clone()); + None + } + .into_iter() + }) + .collect(); + + if let Some(prev) = maybe_previous { + result.push(prev); + } + + result + } + + /// Create a new EditedText with the given operations. + /// The operations must be in the order in which they are meant to be applied. + /// The operations must not overlap. + fn new(text: &'a str, operations: Vec) -> Self { + operations + .iter() + .zip(operations.iter().skip(1)) + .for_each(|(previous, next)| { + debug_assert!( + previous.operation.start_index() <= next.operation.start_index(), + "{} must not come before {} yet it does", + previous.operation, + next.operation + ); + }); + + Self { text, operations } + } + + pub fn merge(self, other: Self) -> Self { + debug_assert_eq!( + self.text, other.text, + "EditedTexts must be derived from the same text to be mergable" + ); + + let mut left_merge_context = MergeContext::default(); + let mut right_merge_context = MergeContext::default(); + + Self::new( + self.text, + self.operations + .into_iter() + .map(|op| (op, Side::Left)) + .merge_sorted_by_key( + other.operations.into_iter().map(|op| (op, Side::Right)), + |(operation, _)| operation.order, + ) + .flat_map(|(OrderedOperation { order, operation }, side)| { + match side { + Side::Left => operation.merge_operations_with_context( + &mut right_merge_context, + &mut left_merge_context, + ), + Side::Right => operation.merge_operations_with_context( + &mut left_merge_context, + &mut right_merge_context, + ), + } + .map(|operation| OrderedOperation { order, operation }) + .into_iter() + }) + .collect(), + ) + } + + /// Apply the operations to the text and return the resulting text. + /// + /// # Errors + /// + /// Returns an SyncLibError::OperationError if the operations cannot be applied to the text. + pub fn apply(&self) -> Result { + let mut text = Rope::from_str(self.text); + self.operations + .iter() + .try_fold( + &mut text, + |rope_text, OrderedOperation { operation, .. }| operation.apply(rope_text), + ) + .map(|rope| rope.to_string()) + } +} + +#[cfg(test)] +mod tests { + use std::{env, fs, ops::Range, path::Path}; + + use pretty_assertions::assert_eq; + use test_case::test_matrix; + + use super::*; + + #[test] + fn test_calculate_operations() { + let left = "hello world! How are you? Adam"; + let right = "Hello, my friend! How are you doing? Albert"; + + let operations = EditedText::from_strings(left, right); + + insta::assert_debug_snapshot!(operations); + + let new_right = operations.apply().unwrap(); + + assert_eq!(new_right.to_string(), right); + } + + #[test] + fn test_calculate_operations_with_no_diff() { + let text = "hello world!"; + + let operations = EditedText::from_strings(text, text); + + assert_eq!(operations.operations.len(), 0); + + let new_right = operations.apply().unwrap(); + + assert_eq!(new_right.to_string(), text); + } + + #[test] + fn test_calculate_operations_with_insert() { + let original = "hello world! ..."; + let left = "Hello world! How are you?"; + let right = "hello world! I'm Andras."; + let expected = "Hello world! I'm Andras. How are you?"; + + let operations_1 = EditedText::from_strings(original, left); + println!("{:#?}", operations_1); + let operations_2 = EditedText::from_strings(original, right); + println!("{:#?}", operations_2); + + let operations = operations_1.merge(operations_2); + + assert_eq!(operations.apply().unwrap(), expected); + } +} diff --git a/backend/reconcile/src/operation_transformation/merge_context.rs b/backend/reconcile/src/operation_transformation/merge_context.rs new file mode 100644 index 0000000..6a14105 --- /dev/null +++ b/backend/reconcile/src/operation_transformation/merge_context.rs @@ -0,0 +1,34 @@ +use crate::operation_transformation::{operation, Operation}; + +#[derive(Debug, Clone, Default)] +pub struct MergeContext { + pub last_delete: Option, + pub shift: i64, +} + +impl MergeContext { + /// Replace the last delete operation (if there was one) with a new one while + /// applying it to the shift. + pub fn replace_delete(&mut self, delete: Option) { + if let Some(produced_last_delete) = self.last_delete.take() { + self.shift -= produced_last_delete.len() as i64; + } + + self.last_delete = delete; + } + + /// Remove the last delete operation (if there was one) in case it is behind the + /// threshold operation. + pub fn consume_delete_if_behind_operation(&mut self, threshold_operation: &Operation) { + match self.last_delete.as_ref() { + Some(last_delete) + if threshold_operation.start_index() as i64 + self.shift + > last_delete.end_index() as i64 => + { + self.shift -= last_delete.len() as i64; + self.last_delete = None; + } + _ => {} + } + } +} diff --git a/backend/reconcile/src/operation_transformation/mod.rs b/backend/reconcile/src/operation_transformation/mod.rs new file mode 100644 index 0000000..5911a4b --- /dev/null +++ b/backend/reconcile/src/operation_transformation/mod.rs @@ -0,0 +1,169 @@ +mod edited_text; +mod merge_context; +mod operation; + +pub use edited_text::EditedText; +pub use operation::Operation; + +use crate::errors::SyncLibError; + +pub fn reconcile(original: &str, left: &str, right: &str) -> Result { + let left_operations = EditedText::from_strings(original, left); + let right_operations = EditedText::from_strings(original, right); + + let merged_operations = left_operations.merge(right_operations); + merged_operations.apply() +} + +#[cfg(test)] +mod test { + use std::{env, fs, ops::Range, path::Path}; + + use pretty_assertions::assert_eq; + use ropey::Rope; + use test_case::test_matrix; + + use super::*; + + #[test] + fn test_merges() { + // Both replaced one token but different + test_merge_both_ways( + "original_1 original_2 original_3", + "original_1 edit_1 original_3", + "original_1 original_2 edit_2", + "original_1 edit_1 edit_2", + ); + + // Both replaced the same one token + test_merge_both_ways( + "original_1 original_2 original_3", + "original_1 edit_1 original_3", + "original_1 edit_1 original_3", + "original_1 edit_1 edit_1 original_3", + ); + + // One deleted a large range, the other deleted subranges and inserted as well + test_merge_both_ways( + "original_1 original_2 original_3 original_4 original_5", + "original_1 original_5", + "original_1 edit_1 original_3 edit_2 original_5", + "original_1 edit_1 edit_2 original_5", + ); + + // One deleted a large range, the other inserted and deleted a partially overlapping range + test_merge_both_ways( + "original_1 original_2 original_3 original_4 original_5", + "original_1 original_5", + "original_1 edit_1 original_3 edit_2", + "original_1 edit_1 edit_2", + ); + + // Merge a replace and an append + test_merge_both_ways("a b ", "c d ", "a b c d ", "c d c d "); + + test_merge_both_ways("a b c d e", "a e", "a c e", "a e"); + + test_merge_both_ways("a 0 1 2 b", "a b", "a E 1 F b", "a E F b"); + + test_merge_both_ways( + "a this one delete b", + "a b", + "a my one change b", + "a my change b", + ); + + test_merge_both_ways( + "this stays, this is one big delete, don't touch this", + "this stays, don't touch this", + "this stays, my one change, don't touch this", + "this stays, my change, don't touch this", + ); + + test_merge_both_ways("1 2 3 4 5 6", "1 6", "1 2 4 ", "1 "); + + test_merge_both_ways( + "hello world", + "hi, world", + "hello my friend!", + "hi, my friend!", + ); + + // test_merge_both_ways("hello world", "world !", "hi hello world", "hi world !"); + + test_merge_both_ways( + "both delete the same word", + "both the same word", + "both the same word", + "both the same word", + ); + + test_merge_both_ways(" ", "it’s utf-8!", " ", "it’s utf-8!"); + + test_merge_both_ways( + "both delete the same word but one a bit more", + "both the same word", + "both same word", + "both same wordword", + ); + + test_merge_both_ways( + "long text with one big delete and many small", + "long small", + "long with big and small", + "long small", + ); + } + + #[test_matrix( [ + "pride_and_prejudice.txt", + "romeo_and_juliet.txt", + "room_with_a_view.txt", + "kun_lu.txt", + + ], [ + "pride_and_prejudice.txt", + "romeo_and_juliet.txt", + "room_with_a_view.txt", + "kun_lu.txt" + ], [ + "pride_and_prejudice.txt", + "romeo_and_juliet.txt", + "room_with_a_view.txt", + "kun_lu.txt" + ], [0..10000, 10000..20000], [0..10000, 10000..20000], [0..10000, 10000..20000])] + fn test_merge_files_without_panic( + file_name_1: &str, + file_name_2: &str, + file_name_3: &str, + range_1: Range, + range_2: Range, + range_3: Range, + ) { + let files = vec![file_name_1, file_name_2, file_name_3]; + let permutations = vec![range_1, range_2, range_3]; + + let root = Path::new("test/resources/"); + + let contents = files + .iter() + .zip(permutations.iter()) + .map(|(file, range)| { + let path = root.join(file); + fs::read_to_string(&path) + .unwrap() + .chars() + .skip(range.start) + .take(range.end) + .collect::() + }) + .collect::>(); + + reconcile(&contents[0], &contents[1], &contents[2]).unwrap(); + } + + fn test_merge_both_ways(original: &str, edit_1: &str, edit_2: &str, expected: &str) { + assert_eq!(reconcile(original, edit_1, edit_2).unwrap(), expected); + assert_eq!(reconcile(original, edit_2, edit_1).unwrap(), expected); + } +} diff --git a/backend/reconcile/src/operations/operation.rs b/backend/reconcile/src/operation_transformation/operation.rs similarity index 53% rename from backend/reconcile/src/operations/operation.rs rename to backend/reconcile/src/operation_transformation/operation.rs index a301cad..b2dd63c 100644 --- a/backend/reconcile/src/operations/operation.rs +++ b/backend/reconcile/src/operation_transformation/operation.rs @@ -6,9 +6,11 @@ use crate::errors::SyncLibError; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; +use super::merge_context::MergeContext; + /// Represents a change that can be applied to a text document. /// Operation is tied to a ropey::Rope and is mainly expected to be -/// created by OperationSequence. +/// created by EditedText. #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum Operation { @@ -68,6 +70,15 @@ impl Operation { } /// Tries to apply the operation to the given ropey::Rope text, returning the modified text. + /// + /// # Errors + /// + /// Returns a SyncLibError::OperationApplicationError if the operation cannot be applied. + /// + /// # Panics + /// + /// When compiled in debug mode, panics if a delete operation is attempted on a range + /// of text that does not match the text to be deleted. pub fn apply<'a>(&self, rope_text: &'a mut Rope) -> Result<&'a mut Rope, SyncLibError> { match self { Operation::Insert { text, .. } => rope_text @@ -145,12 +156,9 @@ impl Operation { } /// Clones the operation while updating the index. - pub fn with_index(&self, index: usize) -> Self { + pub fn with_index(self, index: usize) -> Self { match self { - Operation::Insert { text, .. } => Operation::Insert { - index, - text: text.clone(), - }, + Operation::Insert { text, .. } => Operation::Insert { index, text }, Operation::Delete { deleted_character_count, @@ -159,26 +167,159 @@ impl Operation { .. } => Operation::Delete { index, - deleted_character_count: *deleted_character_count, + deleted_character_count, #[cfg(debug_assertions)] - deleted_text: deleted_text.clone(), + deleted_text, }, } } /// Clones the operation while shifting the index by the given offset. /// The offset can be negative but the resulting index must be non-negative. - pub fn with_shifted_index(&self, offset: i64) -> Result { + /// + /// # Panics + /// + /// In debug mode, panics if the resulting index is negative. + pub fn with_shifted_index(self, offset: i64) -> Self { let index = self.start_index() as i64 + offset; - let non_negative_index = index.try_into().map_err(|_| { - SyncLibError::NegativeOperationIndexError(format!( - "Index {} is negative but operations must have a non-negative index", - index - )) - })?; + debug_assert!(index >= 0, "Shifted index must be non-negative"); - Ok(self.with_index(non_negative_index)) + self.with_index(index as usize) + } + + /// Merges the operation with the given context, producing a new operation and updating the context. + /// This implements a comples FSM that handles the merging of operations in a way that is consistent with the text. + /// The contexts are updated in-place. + pub fn merge_operations_with_context( + self, + affecting_context: &mut MergeContext, + produced_context: &mut MergeContext, + ) -> Option { + affecting_context.consume_delete_if_behind_operation(&self); + + let operation = self.with_shifted_index(affecting_context.shift); + + match (operation, affecting_context.last_delete.clone()) { + (operation @ Operation::Insert { .. }, None) => { + produced_context.shift += operation.len() as i64; + Some(operation) + } + + (operation @ Operation::Delete { .. }, None) => { + produced_context.replace_delete(Some(operation.clone())); + Some(operation) + } + + (operation @ Operation::Insert { .. }, Some(last_delete)) => { + produced_context.shift += operation.len() as i64; + + debug_assert!( + last_delete.range().contains(&operation.start_index()), + "There is a last delete ({last_delete}) but the operation ({operation}) is not contained in it" + ); + + let difference = operation.start_index() as i64 - last_delete.start_index() as i64; + + let moved_operation = operation.with_index(last_delete.start_index()); + + affecting_context.last_delete = Operation::create_delete( + moved_operation.end_index() + 1, + (last_delete.len() as i64 - difference) as usize, + ); + affecting_context.shift -= difference; + + Some(moved_operation) + } + + (operation @ Operation::Delete { .. }, Some(last_delete)) => { + debug_assert!( + last_delete.range().contains(&operation.start_index()), + "There is a last delete ({last_delete}) but the operation ({operation}) is not contained in it" + ); + + let difference = operation.start_index() as i64 - last_delete.start_index() as i64; + + let updated_delete = Operation::create_delete( + last_delete.start_index(), + 0.max(operation.end_index() as i64 - last_delete.end_index() as i64) as usize, + ); + + affecting_context.shift -= difference; + affecting_context.last_delete = Operation::create_delete( + last_delete.start_index(), + 0.max(last_delete.end_index() as i64 - operation.end_index() as i64) as usize, + ); + + produced_context.replace_delete(updated_delete.clone()); + + updated_delete + } + } + } + + /// Merges the operation with another operation that is consequtive to this operation. + /// The other operation must start where this operation ends. + /// The two operations must be of the same type, otherwise panics. + pub fn extend(self, other: &Self) -> Self { + match (self, other) { + ( + Operation::Insert { index, text }, + Operation::Insert { + text: other_text, .. + }, + ) => { + let end_index = index + text.chars().count(); + debug_assert!( + end_index == other.start_index(), + "Cannot merge non-consequtive inserts with index {} and {}", + end_index, + other.start_index() + ); + + Operation::Insert { + index, + text: text + other_text, + } + } + ( + Operation::Delete { + index, + deleted_character_count, + + #[cfg(debug_assertions)] + deleted_text, + }, + Operation::Delete { + index: other_index, + deleted_character_count: other_deleted_character_count, + + #[cfg(debug_assertions)] + deleted_text: other_deleted_text, + }, + ) => { + debug_assert!( + index == *other_index, + "Cannot merge non-consequtive deletes", + ); + + Operation::Delete { + index, + deleted_character_count: deleted_character_count + + other_deleted_character_count, + + #[cfg(debug_assertions)] + deleted_text: deleted_text + .into_iter() + .flat_map(|t1| other_deleted_text.as_ref().map(|t2| t1 + t2).into_iter()) + .last(), + } + } + (this, other) => panic!( + "Cannot merge operations of different type: {:?} and {:?}", + &this, &other + ), + } } } @@ -195,7 +336,7 @@ impl Display for Operation { #[cfg(debug_assertions)] deleted_text, } => { - if cfg!(debug_assertions) { + if cfg!(debug_assertions) && deleted_text.is_some() { write!( f, "", @@ -220,6 +361,7 @@ mod tests { use pretty_assertions::assert_eq; #[test] + #[should_panic] fn test_shifting_error() { insta::assert_debug_snapshot!(Operation::create_insert(1, "hi".to_string()) .unwrap() diff --git a/backend/reconcile/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_text__tests__calculate_operations.snap b/backend/reconcile/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_text__tests__calculate_operations.snap new file mode 100644 index 0000000..c01a447 --- /dev/null +++ b/backend/reconcile/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_text__tests__calculate_operations.snap @@ -0,0 +1,61 @@ +--- +source: reconcile/src/operation_transformation/edited_text.rs +expression: operations +snapshot_kind: text +--- +EditedText { + text: "hello world! How are you? Adam", + operations: [ + OrderedOperation { + order: 0, + operation: Insert { + index: 0, + text: "Hello, my friend! ", + }, + }, + OrderedOperation { + order: 0, + operation: Delete { + index: 18, + deleted_character_count: 13, + deleted_text: Some( + "hello world! ", + ), + }, + }, + OrderedOperation { + order: 21, + operation: Delete { + index: 26, + deleted_character_count: 5, + deleted_text: Some( + "you? ", + ), + }, + }, + OrderedOperation { + order: 26, + operation: Delete { + index: 26, + deleted_character_count: 5, + deleted_text: Some( + " Adam", + ), + }, + }, + OrderedOperation { + order: 31, + operation: Insert { + index: 26, + text: "you ", + }, + }, + OrderedOperation { + order: 31, + operation: Insert { + index: 30, + text: "doing? Albert", + }, + }, + ], +} diff --git a/backend/reconcile/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_text__tests__calculate_operations.snap.new b/backend/reconcile/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_text__tests__calculate_operations.snap.new new file mode 100644 index 0000000..7a2cf8e --- /dev/null +++ b/backend/reconcile/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_text__tests__calculate_operations.snap.new @@ -0,0 +1,45 @@ +--- +source: reconcile/src/operation_transformation/edited_text.rs +assertion_line: 242 +expression: operations +snapshot_kind: text +--- +EditedText { + text: "hello world! How are you? Adam", + operations: [ + OrderedOperation { + order: 0, + operation: Insert { + index: 0, + text: "Hello, my friend! ", + }, + }, + OrderedOperation { + order: 0, + operation: Delete { + index: 18, + deleted_character_count: 13, + deleted_text: Some( + "hello world! ", + ), + }, + }, + OrderedOperation { + order: 21, + operation: Delete { + index: 26, + deleted_character_count: 10, + deleted_text: Some( + "you? Adam", + ), + }, + }, + OrderedOperation { + order: 31, + operation: Insert { + index: 26, + text: "you doing? Albert", + }, + }, + ], +} diff --git a/backend/reconcile/src/operations/snapshots/reconcile__operations__operation_sequence__tests__calculate_operations_with_large_diff.snap b/backend/reconcile/src/operation_transformation/snapshots/reconcile__operations__edited_text__tests__calculate_operations.snap similarity index 54% rename from backend/reconcile/src/operations/snapshots/reconcile__operations__operation_sequence__tests__calculate_operations_with_large_diff.snap rename to backend/reconcile/src/operation_transformation/snapshots/reconcile__operations__edited_text__tests__calculate_operations.snap index 810d341..02956ef 100644 --- a/backend/reconcile/src/operations/snapshots/reconcile__operations__operation_sequence__tests__calculate_operations_with_large_diff.snap +++ b/backend/reconcile/src/operation_transformation/snapshots/reconcile__operations__edited_text__tests__calculate_operations.snap @@ -1,60 +1,61 @@ --- -source: reconcile/src/operations/operation_sequence.rs -expression: result +source: reconcile/src/operations/edited_text.rs +expression: operations snapshot_kind: text --- -OperationSequence { +EditedText { + text: "hello world! How are you? Adam", operations: [ - ( - 0, - Insert { + OrderedOperation { + order: 0, + operation: Insert { index: 0, text: "Hello, my friend! ", }, - ), - ( - 0, - Delete { + }, + OrderedOperation { + order: 0, + operation: Delete { index: 18, deleted_character_count: 13, deleted_text: Some( "hello world! ", ), }, - ), - ( - 21, - Delete { + }, + OrderedOperation { + order: 21, + operation: Delete { index: 26, deleted_character_count: 5, deleted_text: Some( "you? ", ), }, - ), - ( - 26, - Delete { + }, + OrderedOperation { + order: 26, + operation: Delete { index: 26, deleted_character_count: 5, deleted_text: Some( " Adam", ), }, - ), - ( - 31, - Insert { + }, + OrderedOperation { + order: 31, + operation: Insert { index: 26, text: "you ", }, - ), - ( - 31, - Insert { + }, + OrderedOperation { + order: 31, + operation: Insert { index: 30, text: "doing? Albert", }, - ), + }, ], } diff --git a/backend/reconcile/src/operations/snapshots/reconcile__operations__operation_sequence__tests__calculate_operations.snap b/backend/reconcile/src/operation_transformation/snapshots/reconcile__operations__operation_sequence__tests__calculate_operations.snap similarity index 61% rename from backend/reconcile/src/operations/snapshots/reconcile__operations__operation_sequence__tests__calculate_operations.snap rename to backend/reconcile/src/operation_transformation/snapshots/reconcile__operations__operation_sequence__tests__calculate_operations.snap index 3b5eef9..1ba5156 100644 --- a/backend/reconcile/src/operations/snapshots/reconcile__operations__operation_sequence__tests__calculate_operations.snap +++ b/backend/reconcile/src/operation_transformation/snapshots/reconcile__operations__operation_sequence__tests__calculate_operations.snap @@ -3,58 +3,58 @@ source: reconcile/src/operations/operation_sequence.rs expression: operations snapshot_kind: text --- -OperationSequence { +EditedText { operations: [ - ( - 0, - Insert { + OrderedOperation { + order: 0, + operation: Insert { index: 0, text: "Hello, my friend! ", }, - ), - ( - 0, - Delete { + }, + OrderedOperation { + order: 0, + operation: Delete { index: 18, deleted_character_count: 13, deleted_text: Some( "hello world! ", ), }, - ), - ( - 21, - Delete { + }, + OrderedOperation { + order: 21, + operation: Delete { index: 26, deleted_character_count: 5, deleted_text: Some( "you? ", ), }, - ), - ( - 26, - Delete { + }, + OrderedOperation { + order: 26, + operation: Delete { index: 26, deleted_character_count: 5, deleted_text: Some( " Adam", ), }, - ), - ( - 31, - Insert { + }, + OrderedOperation { + order: 31, + operation: Insert { index: 26, text: "you ", }, - ), - ( - 31, - Insert { + }, + OrderedOperation { + order: 31, + operation: Insert { index: 30, text: "doing? Albert", }, - ), + }, ], } diff --git a/backend/reconcile/src/operations/mod.rs b/backend/reconcile/src/operations/mod.rs deleted file mode 100644 index c70ea09..0000000 --- a/backend/reconcile/src/operations/mod.rs +++ /dev/null @@ -1,25 +0,0 @@ -mod operation; -mod operation_sequence; - -pub use operation::Operation; -pub use operation_sequence::OperationSequence; - -#[cfg(test)] -mod test { - - #[test] - fn test_merge() { - // let mut original = Rope::from_str("hello world!"); - // let edit_1 = "hi, world"; - // let edit_2 = "hello, my friend!"; - - // let mut operations_1 = calculate_operations(&original.to_string(), edit_1, 1.0).unwrap(); - // let mut operations_2 = calculate_operations(&original.to_string(), edit_2, 1.0).unwrap(); - - // let result = - // merge_and_apply_operations(&mut original, &mut operations_1, &mut operations_2) - // .unwrap(); - - // assert_eq!(result, "hey, my friend!"); - } -} diff --git a/backend/reconcile/src/operations/operation_sequence.rs b/backend/reconcile/src/operations/operation_sequence.rs deleted file mode 100644 index 41d0469..0000000 --- a/backend/reconcile/src/operations/operation_sequence.rs +++ /dev/null @@ -1,540 +0,0 @@ -use std::{cmp::Ordering, result, vec}; - -use super::Operation; -use crate::diffs::myers::diff; -use crate::diffs::raw_operation::RawOperation; -use crate::errors::SyncLibError; -use crate::tokenizer::token::Token; -use itertools::Itertools; -use ropey::Rope; - -#[cfg(feature = "serde")] -use serde::{Deserialize, Serialize}; - -#[derive(Debug, Clone, Default)] -struct MergeContext { - last_delete: Option, - shift: i64, -} - -/// A sequence of operations that can be applied to a text document. -/// OperationSequence supports merging two sequences of operations using the -/// principle of Operational Transformation. -/// -/// It's mainly created through the from_strings method, then merged with another -/// OperationSequence derived from the same original text and then applied to the original text -/// to get the reconciled text of concurrent edits. -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[derive(Debug, Clone, PartialEq, Eq, Hash, Default)] -pub struct OperationSequence { - operations: Vec<(usize, Operation)>, -} - -impl OperationSequence { - /// Creates a new OperationSequence with the given operations. - /// The operations should be in the order they should be applied. - /// The operations must not overlap. - pub fn new(operations: Vec<(usize, Operation)>) -> Self { - operations.iter().zip(operations.iter().skip(1)).for_each( - |((i_prev, previous), (i_next, next))| { - debug_assert!( - i_prev == i_next - || i_prev + previous.len() <= *i_next - || !(matches!(previous, Operation::Delete { .. }) - && matches!(next, Operation::Insert { .. })), - "{} and {} overlap with old index {i_prev} and {i_next}", - previous, - next - ); - debug_assert!( - previous.start_index() <= next.start_index(), - "{} must not come before {} yet it does", - previous, - next - ); - }, - ); - - Self { - operations, // operations: Self::merge_subsequent_operations(operations), - } - } - - /// Creates an OperationSequence from the given original (old) and updated (new) strings. - /// The returned OperationSequence represents the changes from the original to the updated text. - /// When the return value is applied to the original text, it will result in the updated text. - pub fn from_strings(original: &str, updated: &str) -> Self { - let original_tokens = Token::tokenize(original); - let updated_tokens = Token::tokenize(updated); - - let diff: Vec = diff(&original_tokens, &updated_tokens); - - Self::new(Self::cook_operations(diff)) - } - - fn cook_operations(raw_operations: Vec) -> Vec<(usize, Operation)> { - let mut new_index = 0; - let mut old_index = 0; - - raw_operations - .into_iter() - .flat_map(|raw_operation| { - let length = raw_operation.original_text_length(); - - let operation = match raw_operation { - RawOperation::Equal(..) => { - new_index += length; - old_index += length; - - None - } - RawOperation::Insert(..) => { - let op = - Operation::create_insert(new_index, raw_operation.get_original_text()) - .map(|op| (old_index, op)); - - new_index += length; - - op - } - RawOperation::Delete(..) => { - let op = Operation::create_delete_with_text( - new_index, - raw_operation.get_original_text(), - ) - .map(|op| (old_index, op)); - - old_index += length; - - op - } - }; - - operation.into_iter() - }) - .sorted_by_key(|(order, _)| *order) - .collect() - } - - pub fn merge(&self, other: &Self) -> Result { - let mut merged_operations: Vec = - Vec::with_capacity(self.operations.len() + other.operations.len()); - - let mut left_merge_context = MergeContext::default(); - let mut right_merge_context = MergeContext::default(); - - let mut left_index: usize = 0; - let mut right_index: usize = 0; - - loop { - let left_op = self.operations.get(left_index); - let right_op = other.operations.get(right_index); - - let order = left_op - .map(|(order, _)| order) - .cmp(&right_op.map(|(order, _)| order)); - - println!("left_op: {:#?} <> right_op: {:#?}", left_op, right_op); - - let left_op = left_op.map(|(_, op)| op); - let right_op = right_op.map(|(_, op)| op); - - // let order = if order == Ordering::Equal { - // match (left_op.as_ref(), right_op.as_ref()) { - // (Some(Operation::Insert { .. }), Some(Operation::Delete { .. })) => { - // Ordering::Greater - // } - // (Some(Operation::Delete { .. }), Some(Operation::Insert { .. })) => { - // Ordering::Less - // } - // _ => Ordering::Equal, - // } - // } else { - // order - // }; - - // debug_assert!( - // right_merge_context.last_delete.is_none() - // || left_merge_context.last_delete.is_none(), - // "Both contexts have a last delete" - // ); - - match (left_op, right_op, order) { - (Some(left_op), None, _) - | (Some(left_op), Some(_), std::cmp::Ordering::Less | std::cmp::Ordering::Equal) => { - Self::pick_up_dangling_delete_from_affecting_context( - left_op.start_index(), - &mut right_merge_context, - ); - - if let Some(op) = Self::merge_operations_with_context( - left_op.with_shifted_index(right_merge_context.shift)?, - &mut right_merge_context, - &mut left_merge_context, - )? { - // println!("merged {:#?}", &op); - if let Some(last) = merged_operations.last() { - debug_assert!(op.start_index() >= last.start_index()); - } - merged_operations.push(op); - } - - left_index += 1; - } - (None, Some(right_op), _) - | (Some(_), Some(right_op), std::cmp::Ordering::Greater) => { - Self::pick_up_dangling_delete_from_affecting_context( - right_op.start_index(), - &mut left_merge_context, - ); - - if let Some(op) = Self::merge_operations_with_context( - right_op.with_shifted_index(left_merge_context.shift)?, - &mut left_merge_context, - &mut right_merge_context, - )? { - // println!("merged {:#?}", &op); - if let Some(last) = merged_operations.last() { - debug_assert!(op.start_index() >= last.start_index()); - } - merged_operations.push(op); - } - - right_index += 1; - } - (None, None, _) => { - break; - } - }; - - println!( - "{:#?} <> {:#?}\n\n\n", - left_merge_context, right_merge_context - ); - } - - println!("merged_operations: {:#?}", merged_operations.to_vec()); - - Ok(Self::new( - merged_operations.into_iter().map(|op| (0, op)).collect(), - )) - } - - pub fn apply<'a>(&self, rope_text: &'a mut Rope) -> Result<&'a mut Rope, SyncLibError> { - for (_, operation) in &self.operations { - operation.apply(rope_text)?; - } - - Ok(rope_text) - } - - fn merge_operations_with_context( - aligned_operation: Operation, - affecting_context: &mut MergeContext, - produced_context: &mut MergeContext, - ) -> Result, SyncLibError> { - Ok( - match (aligned_operation, affecting_context.last_delete.clone()) { - (operation @ Operation::Insert { .. }, None) => { - produced_context.shift += operation.len() as i64; - Some(operation) - } - - (operation @ Operation::Delete { .. }, None) => { - Self::replace_delete_in_produced_context( - produced_context, - Some(operation.clone()), - ); - Some(operation) - } - - (operation @ Operation::Insert { .. }, Some(last_delete)) => { - produced_context.shift += operation.len() as i64; - - debug_assert!( - last_delete.range().contains(&operation.start_index()), - "There is a last delete ({last_delete}) but the operation ({operation}) is not contained in it" - ); - - let difference = - operation.start_index() as i64 - last_delete.start_index() as i64; - - let moved_operation = operation.with_index(last_delete.start_index()); - - affecting_context.last_delete = Operation::create_delete( - moved_operation.end_index() + 1, - (last_delete.len() as i64 - difference) as usize, - ); - affecting_context.shift -= difference; - - Some(moved_operation) - } - - (operation @ Operation::Delete { .. }, Some(last_delete)) => { - debug_assert!( - last_delete.range().contains(&operation.start_index()), - "There is a last delete ({last_delete}) but the operation ({operation}) is not contained in it" - ); - - let difference = - operation.start_index() as i64 - last_delete.start_index() as i64; - - let updated_delete = Operation::create_delete( - last_delete.start_index(), - 0.max(operation.end_index() as i64 - last_delete.end_index() as i64) - as usize, - ); - - affecting_context.shift -= difference; - affecting_context.last_delete = Operation::create_delete( - last_delete.start_index(), - 0.max(last_delete.end_index() as i64 - operation.end_index() as i64) - as usize, - ); - - Self::replace_delete_in_produced_context( - produced_context, - updated_delete.clone(), - ); - - updated_delete - } - }, - ) - } - - fn replace_delete_in_produced_context( - produced_context: &mut MergeContext, - delete: Option, - ) { - if let Some(produced_last_delete) = produced_context.last_delete.take() { - produced_context.shift -= produced_last_delete.len() as i64; - } - - produced_context.last_delete = delete; - } - - fn pick_up_dangling_delete_from_affecting_context( - start_index: usize, - affecting_context: &mut MergeContext, - ) { - match affecting_context.last_delete.as_ref() { - Some(last_delete) - if start_index as i64 + affecting_context.shift - > last_delete.end_index() as i64 => - { - affecting_context.shift -= last_delete.len() as i64; - affecting_context.last_delete = None; - } - _ => {} - } - } -} - -#[cfg(test)] -mod tests { - use std::{fs, path::Path}; - - use itertools::Itertools; - use pretty_assertions::assert_eq; - - use super::*; - - #[test] - fn test_calculate_operations() { - let left = "hello world! How are you? Adam"; - let right = "Hello, my friend! How are you doing? Albert"; - - let operations = OperationSequence::from_strings(left, right); - - insta::assert_debug_snapshot!(operations); - - let mut left = Rope::from_str(left); - let new_right = operations.apply(&mut left).unwrap(); - - assert_eq!(new_right.to_string(), right); - } - - #[test] - fn test_calculate_operations_with_no_diff() { - let left = "hello world!"; - let right = "hello world!"; - - let operations = OperationSequence::from_strings(left, right); - - assert_eq!(operations.operations.len(), 0); - - let mut left = Rope::from_str(left); - let new_right = operations.apply(&mut left).unwrap(); - - assert_eq!(new_right.to_string(), right); - } - - #[test] - fn test_merges() { - // Both replaced one token but different - test_merge_both_ways( - "original_1 original_2 original_3", - "original_1 edit_1 original_3", - "original_1 original_2 edit_2", - "original_1 edit_1 edit_2", - ); - - // Both replaced the same one token - test_merge_both_ways( - "original_1 original_2 original_3", - "original_1 edit_1 original_3", - "original_1 edit_1 original_3", - "original_1 edit_1 edit_1 original_3", - ); - - // One deleted a large range, the other deleted subranges and inserted as well - test_merge_both_ways( - "original_1 original_2 original_3 original_4 original_5", - "original_1 original_5", - "original_1 edit_1 original_3 edit_2 original_5", - "original_1 edit_1 edit_2 original_5", - ); - - // One deleted a large range, the other inserted and deleted a partially overlapping range - test_merge_both_ways( - "original_1 original_2 original_3 original_4 original_5", - "original_1 original_5", - "original_1 edit_1 original_3 edit_2", - "original_1 edit_1 edit_2", - ); - - // Merge a replace and an append - test_merge_both_ways("a b ", "c d ", "a b c d ", "c d c d "); - - test_merge_both_ways("a b c d e", "a e", "a c e", "a e"); - - test_merge_both_ways("a 0 1 2 b", "a b", "a E 1 F b", "a E F b"); - - test_merge_both_ways( - "a this one delete b", - "a b", - "a my one change b", - "a my change b", - ); - - test_merge_both_ways( - "this stays, this is one big delete, don't touch this", - "this stays, don't touch this", - "this stays, my one change, don't touch this", - "this stays, my change, don't touch this", - ); - - test_merge_both_ways("1 2 3 4 5 6", "1 6", "1 2 4 ", "1 "); - - test_merge_both_ways( - "hello world", - "hi, world", - "hello my friend!", - "hi, my friend!", - ); - - // test_merge_both_ways("hello world", "world !", "hi hello world", "hi world !"); - - test_merge_both_ways( - "both delete the same word", - "both the same word", - "both the same word", - "both the same word", - ); - - test_merge_both_ways(" ", "it’s utf-8!", " ", "it’s utf-8!"); - - test_merge_both_ways( - "both delete the same word but one a bit more", - "both the same word", - "both same word", - "both same wordword", - ); - - test_merge_both_ways( - "long text with one big delete and many small", - "long small", - "long with big and small", - "long small", - ); - } - - #[test] - - fn test_merge_files_without_panic() { - let files = vec![ - "pride_and_prejudice.txt", - "romeo_and_juliet.txt", - "room_with_a_view.txt", - ]; - - let root = Path::new("test/resources/"); - let contents = files - .into_iter() - .map(|name| fs::read_to_string(root.join(name)).unwrap()) - .map(|text| text[..15000].to_string()) - .collect::>(); - - contents - .iter() - .permutations(3) - .unique() - .for_each(|permutations| { - test_merge(permutations[0], permutations[1], permutations[2]); - }); - } - - fn test_merge_both_ways(original: &str, edit_1: &str, edit_2: &str, expected: &str) { - assert_eq!(test_merge(original, edit_1, edit_2), expected); - assert_eq!(test_merge(original, edit_2, edit_1), expected); - } - - fn test_merge(original: &str, edit_1: &str, edit_2: &str) -> String { - // println!( - // "original: '{:#}'", - // original[..100.min(original.len())].to_string() - // ); - // println!( - // "edit_1: '{:#}'", - // edit_1[..100.min(edit_1.len())].to_string() - // ); - // println!( - // "edit_2: '{:#}'", - // edit_2[..100.min(edit_2.len())].to_string() - // ); - - let mut original = Rope::from_str(original); - - let operations_1 = OperationSequence::from_strings(&original.to_string(), edit_1); - // println!( - // "operations_1: {:#?}", - // operations_1.operations[..20.min(operations_1.operations.len())].to_vec() - // ); - let operations_2 = OperationSequence::from_strings(&original.to_string(), edit_2); - // println!( - // "operations_2: {:#?}", - // operations_2.operations[..20.min(operations_2.operations.len())].to_vec() - // ); - - assert_eq!( - operations_1 - .apply(&mut original.clone()) - .unwrap() - .to_string(), - edit_1 - ); - assert_eq!( - operations_2 - .apply(&mut original.clone()) - .unwrap() - .to_string(), - edit_2 - ); - - let merged = operations_1.merge(&operations_2).unwrap(); - - let result = merged.apply(&mut original).unwrap(); - result.to_string() - } -} diff --git a/backend/reconcile/src/operations/snapshots/reconcile__operations__operation__tests__shifting_error.snap b/backend/reconcile/src/operations/snapshots/reconcile__operations__operation__tests__shifting_error.snap deleted file mode 100644 index 2d65dd1..0000000 --- a/backend/reconcile/src/operations/snapshots/reconcile__operations__operation__tests__shifting_error.snap +++ /dev/null @@ -1,10 +0,0 @@ ---- -source: reconcile/src/operations/operation.rs -expression: "Operation::create_insert(1, \"hi\".to_string()).unwrap().with_shifted_index(-2)" -snapshot_kind: text ---- -Err( - NegativeOperationIndexError( - "Index -1 is negative but operations must have a non-negative index", - ), -) diff --git a/backend/reconcile/src/utils/ordered_operation.rs b/backend/reconcile/src/utils/ordered_operation.rs index 065853f..6cac730 100644 --- a/backend/reconcile/src/utils/ordered_operation.rs +++ b/backend/reconcile/src/utils/ordered_operation.rs @@ -1,7 +1,7 @@ #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; -use crate::operations::Operation; +use crate::operation_transformation::Operation; #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone, PartialEq, Eq, Hash)]