From 3303a31cc4d5135edd9e896f35c1bd7b10d768e6 Mon Sep 17 00:00:00 2001 From: Andras Schmelczer Date: Fri, 29 Nov 2024 22:20:42 +0000 Subject: [PATCH] Merge inserts with inserts --- .../operation_transformation/edited_text.rs | 12 +++- .../operation_transformation/merge_context.rs | 38 ++++++++--- .../src/operation_transformation/mod.rs | 4 +- .../src/operation_transformation/operation.rs | 55 ++++++++++++---- ...ted_text__tests__calculate_operations.snap | 52 ++------------- .../src/utils/find_common_overlap.rs | 66 +++++++++++++++++++ backend/reconcile/src/utils/mod.rs | 1 + 7 files changed, 155 insertions(+), 73 deletions(-) create mode 100644 backend/reconcile/src/utils/find_common_overlap.rs diff --git a/backend/reconcile/src/operation_transformation/edited_text.rs b/backend/reconcile/src/operation_transformation/edited_text.rs index da9ed33a..dcc11bc0 100644 --- a/backend/reconcile/src/operation_transformation/edited_text.rs +++ b/backend/reconcile/src/operation_transformation/edited_text.rs @@ -197,7 +197,14 @@ where .map(|op| (op, Side::Left)) .merge_sorted_by_key( other.operations.into_iter().map(|op| (op, Side::Right)), - |(operation, _)| operation.order, + |(operation, _)| { + ( + operation.order, + // Operations on left and right must come in the same order so that + // inserts can be merged with other inserts and deletes with deletes. + matches!(operation.operation, Operation::Delete { .. }) as usize, + ) + }, ) .flat_map(|(OrderedOperation { order, operation }, side)| { match side { @@ -272,7 +279,7 @@ mod tests { #[test] fn test_calculate_operations_with_insert() { let original = "hello world! ..."; - let left = "hello world! I'm Andras."; + let left = "Hello world! I'm Andras."; let right = "Hello world! How are you?"; let expected = "Hello world! I'm Andras.How are you?"; @@ -282,7 +289,6 @@ mod tests { println!("{:#?}", operations_2); let operations = operations_1.merge(operations_2); - assert_eq!(operations.apply().unwrap(), expected); } } diff --git a/backend/reconcile/src/operation_transformation/merge_context.rs b/backend/reconcile/src/operation_transformation/merge_context.rs index 6e77a033..c7a80210 100644 --- a/backend/reconcile/src/operation_transformation/merge_context.rs +++ b/backend/reconcile/src/operation_transformation/merge_context.rs @@ -1,6 +1,8 @@ +use std::fmt::Debug; + use crate::operation_transformation::Operation; -#[derive(Debug, Clone)] +#[derive(Clone)] pub struct MergeContext where T: PartialEq + Clone, @@ -21,6 +23,18 @@ where } } +impl Debug for MergeContext +where + T: PartialEq + Clone, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("MergeContext") + .field("last_operation", &self.last_operation) + .field("shift", &self.shift) + .finish() + } +} + impl MergeContext where T: PartialEq + Clone, @@ -55,18 +69,24 @@ where threshold_operation: &Operation, ) { if let Some(last_operation) = self.last_operation.as_ref() { - if threshold_operation.start_index() as i64 + self.shift - > last_operation.end_index() as i64 + if let Operation::Delete { + deleted_character_count, + .. + } = last_operation { - if let Operation::Delete { - deleted_character_count, - .. - } = last_operation + if threshold_operation.start_index() as i64 + self.shift + > last_operation.end_index() as i64 { self.shift -= *deleted_character_count as i64; + self.last_operation = None; + } + } else if let Operation::Insert { .. } = last_operation { + if threshold_operation.start_index() as i64 + self.shift + - last_operation.len() as i64 + > last_operation.end_index() as i64 + { + self.last_operation = None; } - - self.last_operation = None; } } } diff --git a/backend/reconcile/src/operation_transformation/mod.rs b/backend/reconcile/src/operation_transformation/mod.rs index 3d9d239e..234abc82 100644 --- a/backend/reconcile/src/operation_transformation/mod.rs +++ b/backend/reconcile/src/operation_transformation/mod.rs @@ -56,7 +56,7 @@ mod test { "original_1 original_2 original_3", "original_1 edit_1 original_3", "original_1 edit_1 original_3", - "original_1 edit_1 edit_1 original_3", + "original_1 edit_1 original_3", ); // One deleted a large range, the other deleted subranges and inserted as well @@ -120,7 +120,7 @@ mod test { "both delete the same word but one a bit more", "both the same word", "both same word", - "both same wordword", + "both same word", ); test_merge_both_ways( diff --git a/backend/reconcile/src/operation_transformation/operation.rs b/backend/reconcile/src/operation_transformation/operation.rs index 8dc3725c..e669bc93 100644 --- a/backend/reconcile/src/operation_transformation/operation.rs +++ b/backend/reconcile/src/operation_transformation/operation.rs @@ -1,7 +1,8 @@ -use ropey::Rope; -use std::fmt::Display; - +use crate::utils::find_common_overlap::find_common_overlap; use crate::{errors::SyncLibError, Token}; +use ropey::Rope; +use std::fmt::Debug; +use std::fmt::Display; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; @@ -12,7 +13,7 @@ use super::merge_context::MergeContext; /// Operation is tied to a ropey::Rope and is mainly expected to be /// created by EditedText. #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[derive(Debug, Clone, PartialEq)] +#[derive(Clone, PartialEq)] pub enum Operation where T: PartialEq + Clone, @@ -205,22 +206,43 @@ where produced_context: &mut MergeContext, ) -> Option> { affecting_context.consume_last_operation_if_it_is_too_behind(&self); - let operation = self.with_shifted_index(affecting_context.shift); - match (operation, affecting_context.last_operation().clone()) { + match (operation, affecting_context.last_operation()) { (operation @ Operation::Insert { .. }, None) => { produced_context.shift += operation.len() as i64; + produced_context.consume_and_replace_last_operation(Some(operation.clone())); Some(operation) } - (operation, Some(last_insert @ Operation::Insert { .. })) => { - produced_context.shift += operation.len() as i64; - Some(operation) + ( + Operation::Insert { text, index }, + Some(Operation::Insert { + text: previous_inserted_text, + .. + }), + ) => { + let offset_in_tokens = find_common_overlap(previous_inserted_text, &text); + let trimmed_length_in_tokens = previous_inserted_text.len() - offset_in_tokens; + let trimmed_length = previous_inserted_text + .iter() + .skip(offset_in_tokens) + .map(|token| token.get_original_length()) + .sum::(); + let trimmed_operation = + Operation::create_insert(index, text[trimmed_length_in_tokens..].to_vec()); + + affecting_context.shift -= trimmed_length as i64; + produced_context.shift += trimmed_operation + .as_ref() + .map(|op| op.len()) + .unwrap_or_default() as i64; + produced_context.consume_and_replace_last_operation(trimmed_operation.clone()); + + trimmed_operation } - // We can never delete inside an insert - (operation @ Operation::Delete { .. }, None) => { + (operation @ Operation::Delete { .. }, None | Some(Operation::Insert { .. })) => { produced_context.consume_and_replace_last_operation(Some(operation.clone())); Some(operation) } @@ -246,6 +268,8 @@ where )); affecting_context.shift -= difference; + produced_context.consume_and_replace_last_operation(Some(moved_operation.clone())); + Some(moved_operation) } @@ -321,6 +345,15 @@ where } } +impl Debug for Operation +where + T: PartialEq + Clone, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self) + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/backend/reconcile/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_text__tests__calculate_operations.snap b/backend/reconcile/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_text__tests__calculate_operations.snap index 99312c65..e8a04870 100644 --- a/backend/reconcile/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_text__tests__calculate_operations.snap +++ b/backend/reconcile/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_text__tests__calculate_operations.snap @@ -8,63 +8,19 @@ EditedText { operations: [ OrderedOperation { order: 0, - operation: Insert { - index: 0, - text: [ - Token { - normalised: "Hello, ", - original: "Hello, ", - }, - Token { - normalised: "my ", - original: "my ", - }, - Token { - normalised: "friend! ", - original: "friend! ", - }, - ], - }, + operation: , }, OrderedOperation { order: 0, - operation: Delete { - index: 18, - deleted_character_count: 13, - deleted_text: Some( - "hello world! ", - ), - }, + operation: , }, OrderedOperation { order: 21, - operation: Insert { - index: 26, - text: [ - Token { - normalised: "you ", - original: "you ", - }, - Token { - normalised: "doing? ", - original: "doing? ", - }, - Token { - normalised: "Albert", - original: "Albert", - }, - ], - }, + operation: , }, OrderedOperation { order: 21, - operation: Delete { - index: 43, - deleted_character_count: 10, - deleted_text: Some( - "you? Adam", - ), - }, + operation: , }, ], } diff --git a/backend/reconcile/src/utils/find_common_overlap.rs b/backend/reconcile/src/utils/find_common_overlap.rs new file mode 100644 index 00000000..ef3fca93 --- /dev/null +++ b/backend/reconcile/src/utils/find_common_overlap.rs @@ -0,0 +1,66 @@ +use crate::Token; + +/// Given two lists of tokens, returns the offset in the first (old) list from which the two lists have the same tokens until the end of the first list. +/// Thus, the suffix of the old list from the offset to the end is equal to a prefix of the new list. +/// +/// If there is no overlap, the function returns the maxmium offset, the length of the old list. +/// +/// ## Example +/// ``` +/// old: [0, 1, 9, 0, 2, 5] +/// new: [9, 0, 2, 5, 1] +/// ``` +/// > results in an offset of 2 +pub fn find_common_overlap(old: &[Token], new: &[Token]) -> usize +where + T: PartialEq + Clone, +{ + let minimum_offset = old.len().saturating_sub(new.len()); + for offset in minimum_offset..old.len() { + if old.iter().skip(offset).zip(new.iter()).all(|(a, b)| a == b) { + return offset; + } + } + + old.len() +} + +#[cfg(test)] +mod tests { + use super::*; + use pretty_assertions::assert_eq; + + #[test] + fn test_common_overlap() { + assert_eq!(find_common_overlap(&["".into()], &["".into()]), 0); + + assert_eq!( + find_common_overlap( + &["a".into(), "b".into(), "c".into()], + &["b".into(), "c".into(), "a".into()] + ), + 1 + ); + + assert_eq!( + find_common_overlap( + &["a".into(), "a".into(), "a".into()], + &["a".into(), "b".into(), "c".into()] + ), + 2 + ); + + assert_eq!( + find_common_overlap( + &["a".into(), "b".into(), "c".into()], + &["d".into(), "e".into(), "a".into()] + ), + 3 + ); + + assert_eq!( + find_common_overlap(&["a".into(), "a".into()], &["a".into()]), + 1 + ); + } +} diff --git a/backend/reconcile/src/utils/mod.rs b/backend/reconcile/src/utils/mod.rs index d8a272b2..ab4691bd 100644 --- a/backend/reconcile/src/utils/mod.rs +++ b/backend/reconcile/src/utils/mod.rs @@ -1,5 +1,6 @@ pub mod common_prefix_len; pub mod common_suffix_len; +pub mod find_common_overlap; pub mod merge_iters; pub mod ordered_operation; pub mod side;