diff --git a/backend/reconcile/src/operation_transformation/edited_text.rs b/backend/reconcile/src/operation_transformation/edited_text.rs index 3b59ac9..c16fc03 100644 --- a/backend/reconcile/src/operation_transformation/edited_text.rs +++ b/backend/reconcile/src/operation_transformation/edited_text.rs @@ -3,10 +3,14 @@ use crate::diffs::raw_operation::RawOperation; use crate::errors::SyncLibError; use crate::operation_transformation::merge_context::MergeContext; use crate::tokenizer::token::Token; +use crate::tokenizer::word_tokenizer::word_tokenizer; use crate::utils::ordered_operation::OrderedOperation; use crate::utils::side::Side; use crate::{diffs::myers::diff, utils::merge_iters::MergeSorted}; use ropey::Rope; +use std::hash::Hash; +use std::iter; +use std::path::Iter; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; @@ -29,121 +33,129 @@ impl<'a> EditedText<'a> { /// Create an EditedText from the given original (old) and updated (new) strings. /// The returned EditedText represents the changes from the original to the updated text. /// When the return value is applied to the original text, it will result in the updated text. + /// The default word tokenizer is used to tokenize the text which splits the text on whitespaces. pub fn from_strings(original: &'a str, updated: &str) -> Self { - let original_tokens = Token::tokenize(original); - let updated_tokens = Token::tokenize(updated); + Self::from_strings_with_tokenizer(original, updated, &word_tokenizer) + } - let diff: Vec = diff(&original_tokens, &updated_tokens); + /// Create an EditedText from the given original (old) and updated (new) strings. + /// The returned EditedText represents the changes from the original to the updated text. + /// When the return value is applied to the original text, it will result in the updated text. + /// The tokenizer function is used to tokenize the text. + pub fn from_strings_with_tokenizer( + original: &'a str, + updated: &str, + tokenizer: &F, + ) -> Self + where + F: Fn(&str) -> Vec>, + T: PartialEq + Hash + Clone, + { + let original_tokens = (tokenizer)(original); + let updated_tokens = (tokenizer)(updated); + + let diff: Vec> = diff(&original_tokens, &updated_tokens); Self::new( original, - Self::elongate_operations(Self::cook_operations(diff)), + // Self::cook_operations(diff), + Self::cook_operations(Self::elongate_operations(diff.into_iter())).collect(), ) } // Turn raw operations into ordered operations while keeping track of old & new indexes. - fn cook_operations(raw_operations: Vec) -> Vec { + fn cook_operations(raw_operations: I) -> impl Iterator + where + I: IntoIterator>, + T: PartialEq + Hash + Clone, + { let mut new_index = 0; // this is the start index of the operation on the new text let mut order = 0; // this is the start index of the operation on the original text - raw_operations - .into_iter() - .flat_map(|raw_operation| { - let length = raw_operation.original_text_length(); + raw_operations.into_iter().flat_map(move |raw_operation| { + let length = raw_operation.original_text_length(); - let operation = match raw_operation { - RawOperation::Equal(..) => { - new_index += length; - order += length; + let operation = match raw_operation { + RawOperation::Equal(..) => { + new_index += length; + order += length; - None - } - RawOperation::Insert(..) => { - let op = - Operation::create_insert(new_index, raw_operation.get_original_text()) - .map(|operation| OrderedOperation { order, operation }); - - new_index += length; - - op - } - RawOperation::Delete(..) => { - let op = if cfg!(debug_assertions) { - Operation::create_delete_with_text( - new_index, - raw_operation.get_original_text(), - ) - } else { - Operation::create_delete(new_index, length) - } - .map(|operation| OrderedOperation { order, operation }); - - order += length; - - op - } - }; - - operation.into_iter() - }) - .collect() - } - - // TODO: shift ops befor compacting - fn elongate_operations(operations: Vec) -> Vec { - let mut maybe_previous: Option = None; - - let mut result: Vec = operations - .into_iter() - .flat_map(|next| { - if let Some(previous) = maybe_previous.take() { - match (previous, next) { - ( - previous @ OrderedOperation { - operation: Operation::Insert { .. }, - .. - }, - next @ OrderedOperation { - operation: Operation::Insert { .. }, - .. - }, - ) if previous.operation.end_index() + 1 == next.operation.start_index() => { - maybe_previous = Some(OrderedOperation { - order: previous.order, - operation: previous.operation.extend(&next.operation), - }); - None - } - ( - previous @ OrderedOperation { - operation: Operation::Delete { .. }, - .. - }, - next @ OrderedOperation { - operation: Operation::Delete { .. }, - .. - }, - ) if previous.operation.start_index() == next.operation.start_index() => { - maybe_previous = Some(OrderedOperation { - order: previous.order, - operation: previous.operation.extend(&next.operation), - }); - None - } - (previous, next) => { - maybe_previous = Some(next); - Some(previous) - } - } - } else { - maybe_previous = Some(next.clone()); None } - .into_iter() + RawOperation::Insert(..) => { + let op = Operation::create_insert(new_index, raw_operation.get_original_text()) + .map(|operation| OrderedOperation { order, operation }); + + new_index += length; + + op + } + RawOperation::Delete(..) => { + let op = if cfg!(debug_assertions) { + Operation::create_delete_with_text( + new_index, + raw_operation.get_original_text(), + ) + } else { + Operation::create_delete(new_index, length) + } + .map(|operation| OrderedOperation { order, operation }); + + order += length; + + op + } + }; + + operation.into_iter() + }) + } + + fn elongate_operations(raw_operations: I) -> Vec> + where + I: IntoIterator>, + T: PartialEq + Hash + Clone, + { + let mut maybe_previous_insert: Option> = None; + let mut maybe_previous_delete: Option> = None; + + let mut result: Vec> = raw_operations + .into_iter() + .flat_map(|next| match next { + RawOperation::Insert(..) => { + if let Some(prev) = maybe_previous_insert.take() { + maybe_previous_insert = prev.extend(next); + } else { + maybe_previous_insert = Some(next); + } + + Box::new(iter::empty()) as Box>> + } + RawOperation::Delete(..) => { + if let Some(prev) = maybe_previous_delete.take() { + maybe_previous_delete = prev.extend(next); + } else { + maybe_previous_delete = Some(next); + } + + Box::new(iter::empty()) as Box>> + } + RawOperation::Equal(..) => Box::new( + maybe_previous_insert + .take() + .into_iter() + .chain(maybe_previous_delete.take()) + .chain(iter::once(next)), + ) + as Box>>, }) .collect(); - if let Some(prev) = maybe_previous { + if let Some(prev) = maybe_previous_insert { + result.push(prev); + } + + if let Some(prev) = maybe_previous_delete { result.push(prev); } @@ -257,20 +269,20 @@ mod tests { assert_eq!(new_right.to_string(), text); } - // #[test] - // fn test_calculate_operations_with_insert() { - // let original = "hello world! ..."; - // let left = "Hello world! How are you?"; - // let right = "hello world! I'm Andras."; - // let expected = "Hello world! I'm Andras. How are you?"; + #[test] + fn test_calculate_operations_with_insert() { + let original = "hello world! ..."; + let left = "hello world! I'm Andras."; + let right = "Hello world! How are you?"; + let expected = "Hello world! I'm Andras.How are you?"; - // let operations_1 = EditedText::from_strings(original, left); - // println!("{:#?}", operations_1); - // let operations_2 = EditedText::from_strings(original, right); - // println!("{:#?}", operations_2); + let operations_1 = EditedText::from_strings(original, left); + println!("{:#?}", operations_1); + let operations_2 = EditedText::from_strings(original, right); + println!("{:#?}", operations_2); - // let operations = operations_1.merge(operations_2); + let operations = operations_1.merge(operations_2); - // assert_eq!(operations.apply().unwrap(), expected); - // } + assert_eq!(operations.apply().unwrap(), expected); + } } diff --git a/backend/reconcile/src/operation_transformation/operation.rs b/backend/reconcile/src/operation_transformation/operation.rs index f66de81..8874a45 100644 --- a/backend/reconcile/src/operation_transformation/operation.rs +++ b/backend/reconcile/src/operation_transformation/operation.rs @@ -251,70 +251,6 @@ impl Operation { } } } - - /// Merges the operation with another operation that is consequtive to this operation. - /// The other operation must start where this operation ends. - /// The two operations must be of the same type, otherwise panics. - pub fn extend(self, other: &Self) -> Self { - match (self, other) { - ( - Operation::Insert { index, text }, - Operation::Insert { - text: other_text, .. - }, - ) => { - let end_index = index + text.chars().count(); - debug_assert!( - end_index == other.start_index(), - "Cannot merge non-consequtive inserts with index {} and {}", - end_index, - other.start_index() - ); - - Operation::Insert { - index, - text: text + other_text, - } - } - ( - Operation::Delete { - index, - deleted_character_count, - - #[cfg(debug_assertions)] - deleted_text, - }, - Operation::Delete { - index: other_index, - deleted_character_count: other_deleted_character_count, - - #[cfg(debug_assertions)] - deleted_text: other_deleted_text, - }, - ) => { - debug_assert!( - index == *other_index, - "Cannot merge non-consequtive deletes", - ); - - Operation::Delete { - index, - deleted_character_count: deleted_character_count - + other_deleted_character_count, - - #[cfg(debug_assertions)] - deleted_text: deleted_text - .into_iter() - .flat_map(|t1| other_deleted_text.as_ref().map(|t2| t1 + t2).into_iter()) - .last(), - } - } - (this, other) => panic!( - "Cannot merge operations of different type: {:?} and {:?}", - &this, &other - ), - } - } } impl Display for Operation { diff --git a/backend/reconcile/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_text__tests__calculate_operations.snap b/backend/reconcile/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_text__tests__calculate_operations.snap index f6089aa..065ee2e 100644 --- a/backend/reconcile/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_text__tests__calculate_operations.snap +++ b/backend/reconcile/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_text__tests__calculate_operations.snap @@ -25,20 +25,20 @@ EditedText { }, OrderedOperation { order: 21, - operation: Delete { + operation: Insert { index: 26, + text: "you doing? Albert", + }, + }, + OrderedOperation { + order: 21, + operation: Delete { + index: 43, deleted_character_count: 10, deleted_text: Some( "you? Adam", ), }, }, - OrderedOperation { - order: 31, - operation: Insert { - index: 26, - text: "you doing? Albert", - }, - }, ], }