From 44f9a44f63cce40ddedcaa5581b95a9c82002f5d Mon Sep 17 00:00:00 2001 From: Andras Schmelczer Date: Thu, 28 Nov 2024 20:51:05 +0000 Subject: [PATCH] Store tokens instead of text in insert --- backend/reconcile/src/diffs/myers.rs | 7 +-- backend/reconcile/src/diffs/raw_operation.rs | 5 +- .../operation_transformation/edited_text.rs | 43 +++++++------- .../operation_transformation/merge_context.rs | 30 ++++++++-- .../src/operation_transformation/mod.rs | 8 +-- .../src/operation_transformation/operation.rs | 56 +++++++++++++------ ...ted_text__tests__calculate_operations.snap | 30 +++++++++- backend/reconcile/src/tokenizer/mod.rs | 4 ++ backend/reconcile/src/tokenizer/token.rs | 26 ++++----- .../reconcile/src/utils/ordered_operation.rs | 9 ++- 10 files changed, 144 insertions(+), 74 deletions(-) diff --git a/backend/reconcile/src/diffs/myers.rs b/backend/reconcile/src/diffs/myers.rs index b9a6d81..7e0ce78 100644 --- a/backend/reconcile/src/diffs/myers.rs +++ b/backend/reconcile/src/diffs/myers.rs @@ -18,7 +18,6 @@ //! without making reasonable progress. //! For potential improvements here see [similar#15](https://github.com/mitsuhiko/similar/issues/15). -use std::hash::Hash; use std::ops::{Index, IndexMut, Range}; use std::vec; @@ -36,7 +35,7 @@ use super::raw_operation::RawOperation; /// execution time permitted before it bails and falls back to an approximation. pub fn diff(old: &[Token], new: &[Token]) -> Vec> where - T: PartialEq + Hash + Clone, + T: PartialEq + Clone, { let max_d = max_d(old.len(), new.len()); let mut vb = V::new(max_d); @@ -131,7 +130,7 @@ fn find_middle_snake( vb: &mut V, ) -> Option<(usize, usize)> where - T: PartialEq + Hash + Clone, + T: PartialEq + Clone, { let n = old_range.len(); let m = new_range.len(); @@ -238,7 +237,7 @@ fn conquer( vb: &mut V, result: &mut Vec>, ) where - T: PartialEq + Hash + Clone, + T: PartialEq + Clone, { // Check for common prefix let common_prefix_len = common_prefix_len(old, old_range.clone(), new, new_range.clone()); diff --git a/backend/reconcile/src/diffs/raw_operation.rs b/backend/reconcile/src/diffs/raw_operation.rs index ce59436..835412e 100644 --- a/backend/reconcile/src/diffs/raw_operation.rs +++ b/backend/reconcile/src/diffs/raw_operation.rs @@ -1,10 +1,9 @@ use crate::tokenizer::token::Token; -use std::hash::Hash; #[derive(Debug, Clone, PartialEq)] pub enum RawOperation where - T: PartialEq + Hash + Clone, + T: PartialEq + Clone, { Insert(Vec>), Delete(Vec>), @@ -13,7 +12,7 @@ where impl RawOperation where - T: PartialEq + Hash + Clone, + T: PartialEq + Clone, { pub fn tokens(&self) -> &Vec> { match self { diff --git a/backend/reconcile/src/operation_transformation/edited_text.rs b/backend/reconcile/src/operation_transformation/edited_text.rs index 45c0e03..9e44c99 100644 --- a/backend/reconcile/src/operation_transformation/edited_text.rs +++ b/backend/reconcile/src/operation_transformation/edited_text.rs @@ -2,13 +2,12 @@ use super::Operation; use crate::diffs::raw_operation::RawOperation; use crate::errors::SyncLibError; use crate::operation_transformation::merge_context::MergeContext; -use crate::tokenizer::token::Token; use crate::tokenizer::word_tokenizer::word_tokenizer; +use crate::tokenizer::Tokenizer; use crate::utils::ordered_operation::OrderedOperation; use crate::utils::side::Side; use crate::{diffs::myers::diff, utils::merge_iters::MergeSorted}; use ropey::Rope; -use std::hash::Hash; use std::iter; #[cfg(feature = "serde")] @@ -22,13 +21,16 @@ use serde::{Deserialize, Serialize}; /// EditedText derived from the same original text and then applied to the original text /// to get the reconciled text of concurrent edits. #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[derive(Debug, Clone, PartialEq, Eq, Hash, Default)] -pub struct EditedText<'a> { +#[derive(Debug, Clone, PartialEq, Default)] +pub struct EditedText<'a, T> +where + T: PartialEq + Clone, +{ text: &'a str, - operations: Vec, + operations: Vec>, } -impl<'a> EditedText<'a> { +impl<'a> EditedText<'a, String> { /// Create an EditedText from the given original (old) and updated (new) strings. /// The returned EditedText represents the changes from the original to the updated text. /// When the return value is applied to the original text, it will result in the updated text. @@ -36,20 +38,21 @@ impl<'a> EditedText<'a> { pub fn from_strings(original: &'a str, updated: &str) -> Self { Self::from_strings_with_tokenizer(original, updated, &word_tokenizer) } +} +impl<'a, T> EditedText<'a, T> +where + T: PartialEq + Clone, +{ /// Create an EditedText from the given original (old) and updated (new) strings. /// The returned EditedText represents the changes from the original to the updated text. /// When the return value is applied to the original text, it will result in the updated text. /// The tokenizer function is used to tokenize the text. - pub fn from_strings_with_tokenizer( + pub fn from_strings_with_tokenizer( original: &'a str, updated: &str, - tokenizer: &F, - ) -> Self - where - F: Fn(&str) -> Vec>, - T: PartialEq + Hash + Clone, - { + tokenizer: &Tokenizer, + ) -> Self { let original_tokens = (tokenizer)(original); let updated_tokens = (tokenizer)(updated); @@ -63,10 +66,9 @@ impl<'a> EditedText<'a> { } // Turn raw operations into ordered operations while keeping track of old & new indexes. - fn cook_operations(raw_operations: I) -> impl Iterator + fn cook_operations(raw_operations: I) -> impl Iterator> where I: IntoIterator>, - T: PartialEq + Hash + Clone, { let mut new_index = 0; // this is the start index of the operation on the new text let mut order = 0; // this is the start index of the operation on the original text @@ -81,8 +83,8 @@ impl<'a> EditedText<'a> { None } - RawOperation::Insert(..) => { - let op = Operation::create_insert(new_index, raw_operation.get_original_text()) + RawOperation::Insert(tokens) => { + let op = Operation::create_insert(new_index, tokens) .map(|operation| OrderedOperation { order, operation }); new_index += length; @@ -110,10 +112,9 @@ impl<'a> EditedText<'a> { }) } - fn elongate_operations(raw_operations: I) -> Vec> + fn elongate_operations(raw_operations: I) -> Vec> where I: IntoIterator>, - T: PartialEq + Hash + Clone, { let mut maybe_previous_insert: Option> = None; let mut maybe_previous_delete: Option> = None; @@ -164,7 +165,7 @@ impl<'a> EditedText<'a> { /// Create a new EditedText with the given operations. /// The operations must be in the order in which they are meant to be applied. /// The operations must not overlap. - fn new(text: &'a str, operations: Vec) -> Self { + fn new(text: &'a str, operations: Vec>) -> Self { operations .iter() .zip(operations.iter().skip(1)) @@ -271,7 +272,7 @@ mod tests { #[test] fn test_calculate_operations_with_insert() { let original = "hello world! ..."; - let left = "hello world! I'm Andras."; + let left = "Hello world! I'm Andras."; let right = "Hello world! How are you?"; let expected = "Hello world! I'm Andras.How are you?"; diff --git a/backend/reconcile/src/operation_transformation/merge_context.rs b/backend/reconcile/src/operation_transformation/merge_context.rs index e9a81c0..a08d02d 100644 --- a/backend/reconcile/src/operation_transformation/merge_context.rs +++ b/backend/reconcile/src/operation_transformation/merge_context.rs @@ -1,15 +1,33 @@ use crate::operation_transformation::Operation; -#[derive(Debug, Clone, Default)] -pub struct MergeContext { - pub last_delete: Option, +#[derive(Debug, Clone)] +pub struct MergeContext +where + T: PartialEq + Clone, +{ + pub last_delete: Option>, pub shift: i64, } -impl MergeContext { +impl Default for MergeContext +where + T: PartialEq + Clone, +{ + fn default() -> Self { + MergeContext { + last_delete: None, + shift: 0, + } + } +} + +impl MergeContext +where + T: PartialEq + Clone, +{ /// Replace the last delete operation (if there was one) with a new one while /// applying it to the shift. - pub fn replace_delete(&mut self, delete: Option) { + pub fn replace_delete(&mut self, delete: Option>) { if let Some(produced_last_delete) = self.last_delete.take() { self.shift -= produced_last_delete.len() as i64; } @@ -19,7 +37,7 @@ impl MergeContext { /// Remove the last delete operation (if there was one) in case it is behind the /// threshold operation. - pub fn consume_delete_if_behind_operation(&mut self, threshold_operation: &Operation) { + pub fn consume_delete_if_behind_operation(&mut self, threshold_operation: &Operation) { match self.last_delete.as_ref() { Some(last_delete) if threshold_operation.start_index() as i64 + self.shift diff --git a/backend/reconcile/src/operation_transformation/mod.rs b/backend/reconcile/src/operation_transformation/mod.rs index fdf2a32..3d9d239 100644 --- a/backend/reconcile/src/operation_transformation/mod.rs +++ b/backend/reconcile/src/operation_transformation/mod.rs @@ -4,9 +4,8 @@ mod operation; pub use edited_text::EditedText; pub use operation::Operation; -use std::hash::Hash; -use crate::{errors::SyncLibError, tokenizer::token::Token}; +use crate::{errors::SyncLibError, tokenizer::Tokenizer}; pub fn reconcile(original: &str, left: &str, right: &str) -> Result { let left_operations = EditedText::from_strings(original, left); @@ -20,11 +19,10 @@ pub fn reconcile_with_tokenizer( original: &str, left: &str, right: &str, - tokenizer: &F, + tokenizer: &Tokenizer, ) -> Result where - F: Fn(&str) -> Vec>, - T: PartialEq + Hash + Clone, + T: PartialEq + Clone, { let left_operations = EditedText::from_strings_with_tokenizer(original, left, tokenizer); let right_operations = EditedText::from_strings_with_tokenizer(original, right, tokenizer); diff --git a/backend/reconcile/src/operation_transformation/operation.rs b/backend/reconcile/src/operation_transformation/operation.rs index 8874a45..a3751b0 100644 --- a/backend/reconcile/src/operation_transformation/operation.rs +++ b/backend/reconcile/src/operation_transformation/operation.rs @@ -1,7 +1,7 @@ use ropey::Rope; use std::fmt::Display; -use crate::errors::SyncLibError; +use crate::{errors::SyncLibError, Token}; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; @@ -12,11 +12,14 @@ use super::merge_context::MergeContext; /// Operation is tied to a ropey::Rope and is mainly expected to be /// created by EditedText. #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub enum Operation { +#[derive(Debug, Clone, PartialEq)] +pub enum Operation +where + T: PartialEq + Clone, +{ Insert { index: usize, - text: String, + text: Vec>, }, Delete { @@ -28,10 +31,13 @@ pub enum Operation { }, } -impl Operation { +impl Operation +where + T: PartialEq + Clone, +{ /// Creates an insert operation with the given index and text. /// If the text is empty (meaning that the operation would be a no-op), returns None. - pub fn create_insert(index: usize, text: String) -> Option { + pub fn create_insert(index: usize, text: Vec>) -> Option { if text.is_empty() { return None; } @@ -82,7 +88,13 @@ impl Operation { pub fn apply<'a>(&self, rope_text: &'a mut Rope) -> Result<&'a mut Rope, SyncLibError> { match self { Operation::Insert { text, .. } => rope_text - .try_insert(self.start_index(), text) + .try_insert( + self.start_index(), + &text + .iter() + .map(|token| token.original()) + .collect::(), + ) .map_err(|err| { SyncLibError::OperationApplicationError(format!( "Failed to insert text: {}", @@ -141,7 +153,9 @@ impl Operation { /// Returns the number of affected characters. It is always greater than 0 because empty operations cannot be created. pub fn len(&self) -> usize { match self { - Operation::Insert { text, .. } => text.chars().count(), + Operation::Insert { text, .. } => { + text.iter().map(|token| token.get_original_length()).sum() + } Operation::Delete { deleted_character_count, .. @@ -187,9 +201,9 @@ impl Operation { /// The contexts are updated in-place. pub fn merge_operations_with_context( self, - affecting_context: &mut MergeContext, - produced_context: &mut MergeContext, - ) -> Option { + affecting_context: &mut MergeContext, + produced_context: &mut MergeContext, + ) -> Option> { affecting_context.consume_delete_if_behind_operation(&self); let operation = self.with_shifted_index(affecting_context.shift); @@ -253,11 +267,21 @@ impl Operation { } } -impl Display for Operation { +impl Display for Operation +where + T: PartialEq + Clone, +{ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Operation::Insert { index, text } => { - write!(f, "", text, index) + write!( + f, + "", + text.iter() + .map(|token| token.original()) + .collect::(), + index + ) } Operation::Delete { index, @@ -293,7 +317,7 @@ mod tests { #[test] #[should_panic] fn test_shifting_error() { - insta::assert_debug_snapshot!(Operation::create_insert(1, "hi".to_string()) + insta::assert_debug_snapshot!(Operation::create_insert(1, vec!["hi".into()]) .unwrap() .with_shifted_index(-2)); } @@ -301,7 +325,7 @@ mod tests { #[test] fn test_apply_delete_with_create() -> Result<(), SyncLibError> { let mut rope = Rope::from_str("hello world"); - let operation = Operation::create_delete_with_text(5, " world".to_string()).unwrap(); + let operation = Operation::<()>::create_delete_with_text(5, " world".to_string()).unwrap(); operation.apply(&mut rope)?; @@ -313,7 +337,7 @@ mod tests { #[test] fn test_apply_insert() -> Result<(), SyncLibError> { let mut rope = Rope::from_str("hello"); - let operation = Operation::create_insert(5, " my friend".to_string()).unwrap(); + let operation = Operation::create_insert(5, vec![" my friend".into()]).unwrap(); operation.apply(&mut rope)?; diff --git a/backend/reconcile/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_text__tests__calculate_operations.snap b/backend/reconcile/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_text__tests__calculate_operations.snap index 065ee2e..99312c6 100644 --- a/backend/reconcile/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_text__tests__calculate_operations.snap +++ b/backend/reconcile/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_text__tests__calculate_operations.snap @@ -10,7 +10,20 @@ EditedText { order: 0, operation: Insert { index: 0, - text: "Hello, my friend! ", + text: [ + Token { + normalised: "Hello, ", + original: "Hello, ", + }, + Token { + normalised: "my ", + original: "my ", + }, + Token { + normalised: "friend! ", + original: "friend! ", + }, + ], }, }, OrderedOperation { @@ -27,7 +40,20 @@ EditedText { order: 21, operation: Insert { index: 26, - text: "you doing? Albert", + text: [ + Token { + normalised: "you ", + original: "you ", + }, + Token { + normalised: "doing? ", + original: "doing? ", + }, + Token { + normalised: "Albert", + original: "Albert", + }, + ], }, }, OrderedOperation { diff --git a/backend/reconcile/src/tokenizer/mod.rs b/backend/reconcile/src/tokenizer/mod.rs index 6a3b8b4..7ce6463 100644 --- a/backend/reconcile/src/tokenizer/mod.rs +++ b/backend/reconcile/src/tokenizer/mod.rs @@ -1,2 +1,6 @@ +use token::Token; + pub mod token; pub mod word_tokenizer; + +pub type Tokenizer = dyn Fn(&str) -> Vec>; diff --git a/backend/reconcile/src/tokenizer/token.rs b/backend/reconcile/src/tokenizer/token.rs index 1c998ce..a89d059 100644 --- a/backend/reconcile/src/tokenizer/token.rs +++ b/backend/reconcile/src/tokenizer/token.rs @@ -1,17 +1,24 @@ -use std::hash::Hash; - #[derive(Debug, Clone)] pub struct Token where - T: PartialEq + Hash + Clone, + T: PartialEq + Clone, { normalised: T, original: String, } +impl From<&str> for Token { + fn from(s: &str) -> Self { + Token { + normalised: s.to_string(), + original: s.to_string(), + } + } +} + impl Token where - T: PartialEq + Hash + Clone, + T: PartialEq + Clone, { pub fn new(normalised: T, original: String) -> Self { Token { @@ -35,18 +42,9 @@ where impl PartialEq for Token where - T: PartialEq + Hash + Clone, + T: PartialEq + Clone, { fn eq(&self, other: &Self) -> bool { self.normalised == other.normalised } } - -impl Hash for Token -where - T: PartialEq + Hash + Clone, -{ - fn hash(&self, state: &mut H) { - self.normalised.hash(state); - } -} diff --git a/backend/reconcile/src/utils/ordered_operation.rs b/backend/reconcile/src/utils/ordered_operation.rs index 6cac730..17229d2 100644 --- a/backend/reconcile/src/utils/ordered_operation.rs +++ b/backend/reconcile/src/utils/ordered_operation.rs @@ -4,8 +4,11 @@ use serde::{Deserialize, Serialize}; use crate::operation_transformation::Operation; #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct OrderedOperation { +#[derive(Debug, Clone, PartialEq)] +pub struct OrderedOperation +where + T: PartialEq + Clone, +{ pub order: usize, - pub operation: Operation, + pub operation: Operation, }