diff --git a/backend/reconcile/src/diffs/raw_operation.rs b/backend/reconcile/src/diffs/raw_operation.rs index 0df48f5..f95a034 100644 --- a/backend/reconcile/src/diffs/raw_operation.rs +++ b/backend/reconcile/src/diffs/raw_operation.rs @@ -28,10 +28,26 @@ where pub fn get_original_text(self) -> String { self.tokens().iter().map(Token::original).collect() } - /// Extends the operation with another operation if returning the new - /// operation. Only operations of the same type can be used to extend. - /// If the operations are of different types, returns None. + pub fn is_left_joinable(&self) -> bool { + let first_token = self.tokens().first(); + first_token.map_or(true, |t| t.get_is_left_joinable()) + } + + pub fn is_right_joinable(&self) -> bool { + let last_token = self.tokens().last(); + last_token.map_or(true, |t| t.get_is_right_joinable()) + } + + /// Extends the operation with another operation when it returns Some + /// operation. Only operations of the same type as self can be used to + /// extend self. If the operations are of different types, returns None. pub fn extend(self, other: RawOperation) -> Option> { + debug_assert!( + std::mem::discriminant(&self) == std::mem::discriminant(&other), + "Cannot extend operations of different types. This should have been handled before \ + calling this function." + ); + match (self, other) { (RawOperation::Insert(tokens1), RawOperation::Insert(tokens2)) => Some( RawOperation::Insert(tokens1.into_iter().chain(tokens2).collect()), @@ -42,7 +58,7 @@ where (RawOperation::Equal(tokens1), RawOperation::Equal(tokens2)) => Some( RawOperation::Equal(tokens1.into_iter().chain(tokens2).collect()), ), - _ => None, + _ => unreachable!("Only operations of the same type can be extended"), } } } diff --git a/backend/reconcile/src/diffs/snapshots/reconcile__diffs__myers__tests__complex_diff.snap b/backend/reconcile/src/diffs/snapshots/reconcile__diffs__myers__tests__complex_diff.snap index 8c89ed3..57ee086 100644 --- a/backend/reconcile/src/diffs/snapshots/reconcile__diffs__myers__tests__complex_diff.snap +++ b/backend/reconcile/src/diffs/snapshots/reconcile__diffs__myers__tests__complex_diff.snap @@ -9,6 +9,8 @@ snapshot_kind: text Token { normalised: "a", original: "a", + is_left_joinable: true, + is_right_joinable: true, }, ], ), @@ -17,6 +19,8 @@ snapshot_kind: text Token { normalised: "x", original: "x", + is_left_joinable: true, + is_right_joinable: true, }, ], ), @@ -25,6 +29,8 @@ snapshot_kind: text Token { normalised: "b", original: "b", + is_left_joinable: true, + is_right_joinable: true, }, ], ), @@ -33,6 +39,8 @@ snapshot_kind: text Token { normalised: "c", original: "c", + is_left_joinable: true, + is_right_joinable: true, }, ], ), @@ -41,6 +49,8 @@ snapshot_kind: text Token { normalised: "y", original: "y", + is_left_joinable: true, + is_right_joinable: true, }, ], ), @@ -49,6 +59,8 @@ snapshot_kind: text Token { normalised: "d", original: "d", + is_left_joinable: true, + is_right_joinable: true, }, ], ), diff --git a/backend/reconcile/src/diffs/snapshots/reconcile__diffs__myers__tests__delete_only.snap b/backend/reconcile/src/diffs/snapshots/reconcile__diffs__myers__tests__delete_only.snap index f07eb3d..a4598d0 100644 --- a/backend/reconcile/src/diffs/snapshots/reconcile__diffs__myers__tests__delete_only.snap +++ b/backend/reconcile/src/diffs/snapshots/reconcile__diffs__myers__tests__delete_only.snap @@ -9,10 +9,14 @@ snapshot_kind: text Token { normalised: "a", original: "a", + is_left_joinable: true, + is_right_joinable: true, }, Token { normalised: "b", original: "b", + is_left_joinable: true, + is_right_joinable: true, }, ], ), diff --git a/backend/reconcile/src/diffs/snapshots/reconcile__diffs__myers__tests__identical_content.snap b/backend/reconcile/src/diffs/snapshots/reconcile__diffs__myers__tests__identical_content.snap index a99e276..2fc3317 100644 --- a/backend/reconcile/src/diffs/snapshots/reconcile__diffs__myers__tests__identical_content.snap +++ b/backend/reconcile/src/diffs/snapshots/reconcile__diffs__myers__tests__identical_content.snap @@ -9,14 +9,20 @@ snapshot_kind: text Token { normalised: "a", original: "a", + is_left_joinable: true, + is_right_joinable: true, }, Token { normalised: "b", original: "b", + is_left_joinable: true, + is_right_joinable: true, }, Token { normalised: "c", original: "c", + is_left_joinable: true, + is_right_joinable: true, }, ], ), diff --git a/backend/reconcile/src/diffs/snapshots/reconcile__diffs__myers__tests__insert_only.snap b/backend/reconcile/src/diffs/snapshots/reconcile__diffs__myers__tests__insert_only.snap index b32c8ce..e07d844 100644 --- a/backend/reconcile/src/diffs/snapshots/reconcile__diffs__myers__tests__insert_only.snap +++ b/backend/reconcile/src/diffs/snapshots/reconcile__diffs__myers__tests__insert_only.snap @@ -9,10 +9,14 @@ snapshot_kind: text Token { normalised: "a", original: "a", + is_left_joinable: true, + is_right_joinable: true, }, Token { normalised: "b", original: "b", + is_left_joinable: true, + is_right_joinable: true, }, ], ), diff --git a/backend/reconcile/src/diffs/snapshots/reconcile__diffs__myers__tests__prefix_and_suffix.snap b/backend/reconcile/src/diffs/snapshots/reconcile__diffs__myers__tests__prefix_and_suffix.snap index 03c8fee..6b86600 100644 --- a/backend/reconcile/src/diffs/snapshots/reconcile__diffs__myers__tests__prefix_and_suffix.snap +++ b/backend/reconcile/src/diffs/snapshots/reconcile__diffs__myers__tests__prefix_and_suffix.snap @@ -9,6 +9,8 @@ snapshot_kind: text Token { normalised: "a", original: "a", + is_left_joinable: true, + is_right_joinable: true, }, ], ), @@ -17,10 +19,14 @@ snapshot_kind: text Token { normalised: "b", original: "b", + is_left_joinable: true, + is_right_joinable: true, }, Token { normalised: "c", original: "c", + is_left_joinable: true, + is_right_joinable: true, }, ], ), @@ -29,6 +35,8 @@ snapshot_kind: text Token { normalised: "x", original: "x", + is_left_joinable: true, + is_right_joinable: true, }, ], ), @@ -37,6 +45,8 @@ snapshot_kind: text Token { normalised: "d", original: "d", + is_left_joinable: true, + is_right_joinable: true, }, ], ), diff --git a/backend/reconcile/src/operation_transformation/edited_text.rs b/backend/reconcile/src/operation_transformation/edited_text.rs index 8fc2ed9..fdaa87f 100644 --- a/backend/reconcile/src/operation_transformation/edited_text.rs +++ b/backend/reconcile/src/operation_transformation/edited_text.rs @@ -3,15 +3,12 @@ use core::iter; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; -use super::{CursorPosition, Operation, TextWithCursors}; +use super::{CursorPosition, Operation, TextWithCursors, ordered_operation::OrderedOperation}; use crate::{ diffs::{myers::diff, raw_operation::RawOperation}, operation_transformation::merge_context::MergeContext, tokenizer::{Tokenizer, word_tokenizer::word_tokenizer}, - utils::{ - merge_iters::MergeSorted as _, ordered_operation::OrderedOperation, side::Side, - string_builder::StringBuilder, - }, + utils::{merge_iters::MergeSorted as _, side::Side, string_builder::StringBuilder}, }; /// A sequence of operations that can be applied to a text document. @@ -66,11 +63,93 @@ where Self::new( original, - Self::cook_operations(Self::elongate_operations(diff)).collect(), + Self::cook_operations(Self::elongate_operations(Self::break_up_raw_operations( + diff, + ))) + .collect(), updated.cursors, ) } + fn break_up_raw_operations(raw_operations: I) -> impl Iterator> + where + I: IntoIterator>, + { + raw_operations.into_iter().flat_map(|raw_operation| { + let mut result: Vec> = Vec::new(); + match raw_operation { + RawOperation::Insert(tokens) => { + for token in tokens { + result.push(RawOperation::Insert(vec![token])); + } + } + RawOperation::Delete(tokens) => { + for token in tokens { + result.push(RawOperation::Delete(vec![token])); + } + } + RawOperation::Equal(tokens) => { + for token in tokens { + result.push(RawOperation::Equal(vec![token])); + } + } + } + result.into_iter() + }) + } + + fn elongate_operations(raw_operations: I) -> Vec> + where + I: IntoIterator>, + { + let mut maybe_previous_insert: Option> = None; + let mut maybe_previous_delete: Option> = None; + + let mut result: Vec> = raw_operations + .into_iter() + .flat_map(|next| match next { + RawOperation::Insert(..) => match maybe_previous_insert.take() { + Some(prev) if prev.is_right_joinable() && next.is_left_joinable() => { + maybe_previous_insert = prev.extend(next); + Box::new(iter::empty()) as Box>> + } + prev => { + maybe_previous_insert = Some(next); + Box::new(prev.into_iter()) + } + }, + RawOperation::Delete(..) => match maybe_previous_delete.take() { + Some(prev) if prev.is_right_joinable() && next.is_left_joinable() => { + maybe_previous_delete = prev.extend(next); + Box::new(iter::empty()) as Box>> + } + prev => { + maybe_previous_delete = Some(next); + Box::new(prev.into_iter()) + } + }, + RawOperation::Equal(..) => Box::new( + maybe_previous_insert + .take() + .into_iter() + .chain(maybe_previous_delete.take()) + .chain(iter::once(next)), + ) + as Box>>, + }) + .collect(); + + if let Some(prev) = maybe_previous_insert { + result.push(prev); + } + + if let Some(prev) = maybe_previous_delete { + result.push(prev); + } + + result + } + // Turn raw operations into ordered operations while keeping track of old & new // indexes. fn cook_operations(raw_operations: I) -> impl Iterator> @@ -119,56 +198,6 @@ where }) } - fn elongate_operations(raw_operations: I) -> Vec> - where - I: IntoIterator>, - { - let mut maybe_previous_insert: Option> = None; - let mut maybe_previous_delete: Option> = None; - - let mut result: Vec> = raw_operations - .into_iter() - .flat_map(|next| match next { - RawOperation::Insert(..) => { - if let Some(prev) = maybe_previous_insert.take() { - maybe_previous_insert = prev.extend(next); - } else { - maybe_previous_insert = Some(next); - } - - Box::new(iter::empty()) as Box>> - } - RawOperation::Delete(..) => { - if let Some(prev) = maybe_previous_delete.take() { - maybe_previous_delete = prev.extend(next); - } else { - maybe_previous_delete = Some(next); - } - - Box::new(iter::empty()) as Box>> - } - RawOperation::Equal(..) => Box::new( - maybe_previous_insert - .take() - .into_iter() - .chain(maybe_previous_delete.take()) - .chain(iter::once(next)), - ) - as Box>>, - }) - .collect(); - - if let Some(prev) = maybe_previous_insert { - result.push(prev); - } - - if let Some(prev) = maybe_previous_delete { - result.push(prev); - } - - result - } - /// Create a new `EditedText` with the given operations. /// The operations must be in the order in which they are meant to be /// applied. The operations must not overlap. @@ -225,6 +254,7 @@ where // Operations on the left and right must come in the same order so that // inserts can be merged with other inserts and deletes with deletes. usize::from(matches!(operation.operation, Operation::Delete { .. })), + operation.operation.start_index(), // Make sure that the ordering is deterministic regardless which text // is left or right. match &operation.operation { diff --git a/backend/reconcile/src/tokenizer/snapshots/reconcile__tokenizer__word_tokenizer__tests__with_snapshots-3.snap b/backend/reconcile/src/tokenizer/snapshots/reconcile__tokenizer__word_tokenizer__tests__with_snapshots-3.snap index 58d749e..d1c94e1 100644 --- a/backend/reconcile/src/tokenizer/snapshots/reconcile__tokenizer__word_tokenizer__tests__with_snapshots-3.snap +++ b/backend/reconcile/src/tokenizer/snapshots/reconcile__tokenizer__word_tokenizer__tests__with_snapshots-3.snap @@ -5,11 +5,21 @@ snapshot_kind: text --- [ Token { - normalised: "what?", - original: " what?", + normalised: " what?", + original: " ", + is_left_joinable: true, + is_right_joinable: true, }, Token { - normalised: "", + normalised: "what?", + original: "what?", + is_left_joinable: true, + is_right_joinable: true, + }, + Token { + normalised: " ", original: " ", + is_left_joinable: true, + is_right_joinable: true, }, ] diff --git a/backend/reconcile/src/tokenizer/snapshots/reconcile__tokenizer__word_tokenizer__tests__with_snapshots-4.snap b/backend/reconcile/src/tokenizer/snapshots/reconcile__tokenizer__word_tokenizer__tests__with_snapshots-4.snap index 4c28a7f..6740dbc 100644 --- a/backend/reconcile/src/tokenizer/snapshots/reconcile__tokenizer__word_tokenizer__tests__with_snapshots-4.snap +++ b/backend/reconcile/src/tokenizer/snapshots/reconcile__tokenizer__word_tokenizer__tests__with_snapshots-4.snap @@ -4,20 +4,52 @@ expression: "word_tokenizer(\" hello, \\nwhere are you?\")" snapshot_kind: text --- [ + Token { + normalised: " hello,", + original: " ", + is_left_joinable: true, + is_right_joinable: true, + }, Token { normalised: "hello,", - original: " hello,", + original: "hello,", + is_left_joinable: true, + is_right_joinable: true, + }, + Token { + normalised: " \nwhere", + original: " \n", + is_left_joinable: true, + is_right_joinable: true, }, Token { normalised: "where", - original: " \nwhere", + original: "where", + is_left_joinable: true, + is_right_joinable: true, + }, + Token { + normalised: " are", + original: " ", + is_left_joinable: true, + is_right_joinable: true, }, Token { normalised: "are", - original: " are", + original: "are", + is_left_joinable: true, + is_right_joinable: true, + }, + Token { + normalised: " you?", + original: " ", + is_left_joinable: true, + is_right_joinable: true, }, Token { normalised: "you?", - original: " you?", + original: "you?", + is_left_joinable: true, + is_right_joinable: true, }, ] diff --git a/backend/reconcile/src/tokenizer/snapshots/reconcile__tokenizer__word_tokenizer__tests__with_snapshots-5.snap b/backend/reconcile/src/tokenizer/snapshots/reconcile__tokenizer__word_tokenizer__tests__with_snapshots-5.snap new file mode 100644 index 0000000..832147e --- /dev/null +++ b/backend/reconcile/src/tokenizer/snapshots/reconcile__tokenizer__word_tokenizer__tests__with_snapshots-5.snap @@ -0,0 +1,39 @@ +--- +source: reconcile/src/tokenizer/word_tokenizer.rs +expression: "word_tokenizer(\" hello, \\nwhere are you?\")" +snapshot_kind: text +--- +[ + Token { + normalised: " ", + original: " ", + }, + Token { + normalised: "hello,", + original: "hello,", + }, + Token { + normalised: " \n", + original: " \n", + }, + Token { + normalised: "where", + original: "where", + }, + Token { + normalised: " ", + original: " ", + }, + Token { + normalised: "are", + original: "are", + }, + Token { + normalised: " ", + original: " ", + }, + Token { + normalised: "you?", + original: "you?", + }, +] diff --git a/backend/reconcile/src/tokenizer/snapshots/reconcile__tokenizer__word_tokenizer__tests__with_snapshots.snap b/backend/reconcile/src/tokenizer/snapshots/reconcile__tokenizer__word_tokenizer__tests__with_snapshots.snap index 206c7fe..95c8db5 100644 --- a/backend/reconcile/src/tokenizer/snapshots/reconcile__tokenizer__word_tokenizer__tests__with_snapshots.snap +++ b/backend/reconcile/src/tokenizer/snapshots/reconcile__tokenizer__word_tokenizer__tests__with_snapshots.snap @@ -7,9 +7,19 @@ snapshot_kind: text Token { normalised: "Hi", original: "Hi", + is_left_joinable: true, + is_right_joinable: true, + }, + Token { + normalised: " there!", + original: " ", + is_left_joinable: true, + is_right_joinable: true, }, Token { normalised: "there!", - original: " there!", + original: "there!", + is_left_joinable: true, + is_right_joinable: true, }, ] diff --git a/backend/reconcile/src/tokenizer/token.rs b/backend/reconcile/src/tokenizer/token.rs index ab521a7..86cbb92 100644 --- a/backend/reconcile/src/tokenizer/token.rs +++ b/backend/reconcile/src/tokenizer/token.rs @@ -3,29 +3,45 @@ use serde::{Deserialize, Serialize}; /// A token is a string that has been normalised in some way. /// The normalised form is used for comparison, while the original form is used -/// for applying Operations. +/// for applying `Operation`-s. #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone)] pub struct Token where T: PartialEq + Clone + std::fmt::Debug, { - normalised: T, + /// The normalised form of the token used deriving the diff. + pub normalised: T, + + /// The original string, that should be inserted or deleted in the document. original: String, + + /// Whether the token is joinable with the previous token. + is_left_joinable: bool, + + /// Whether the token is joinable with the next token. + is_right_joinable: bool, } impl From<&str> for Token { - fn from(s: &str) -> Self { Token::new(s.trim().to_owned(), s.to_owned()) } + fn from(text: &str) -> Self { Token::new(text.to_owned(), text.to_owned(), true, true) } } impl Token where T: PartialEq + Clone + std::fmt::Debug, { - pub fn new(normalised: T, original: String) -> Self { + pub fn new( + normalised: T, + original: String, + is_left_joinable: bool, + is_right_joinable: bool, + ) -> Self { Token { normalised, original, + is_left_joinable, + is_right_joinable, } } @@ -34,6 +50,10 @@ where pub fn normalised(&self) -> &T { &self.normalised } pub fn get_original_length(&self) -> usize { self.original.chars().count() } + + pub fn get_is_left_joinable(&self) -> bool { self.is_left_joinable } + + pub fn get_is_right_joinable(&self) -> bool { self.is_right_joinable } } impl PartialEq for Token diff --git a/backend/reconcile/tests/examples/multiline.yml b/backend/reconcile/tests/examples/multiline.yml index c751feb..00de7cd 100644 --- a/backend/reconcile/tests/examples/multiline.yml +++ b/backend/reconcile/tests/examples/multiline.yml @@ -7,14 +7,59 @@ left: | right: | Hello there! - Best, Andras expected: | Hello there! - How are you? - Best, Andras + + + How are you? + +--- + +parent: | + - my list + - 2nd item + - 3rd item + +left: | + - my list + - 2nd item + - nested list + - very nested list + - 3rd item + +right: | + - my list + - nested list + - 2nd item + - 3rd item + - another nested list + +expected: | + - my list + - nested list + - 2nd item + - nested list + - very nested list + - 3rd item + - another nested list + +--- + +parent: | + a + a +left: | + a + a +right: | + a + a +expected: | + a + a