diff --git a/backend/reconcile/Cargo.toml b/backend/reconcile/Cargo.toml index 9bcee51f..29e3dcbf 100644 --- a/backend/reconcile/Cargo.toml +++ b/backend/reconcile/Cargo.toml @@ -7,6 +7,9 @@ edition = "2021" ropey = { version = "1.6.1", default-features = false, features = ["simd"] } # thiserror = {workspace = true} log = {workspace = true} +itertools = "0.13.0" + +# optional dependencies serde = { version = "1.0.215", optional = true } [features] @@ -14,5 +17,4 @@ serde = [ "dep:serde" ] [dev-dependencies] insta = "1.41.1" -itertools = "0.13.0" pretty_assertions = "1.4.1" diff --git a/backend/reconcile/src/operations/operation.rs b/backend/reconcile/src/operations/operation.rs index 0d97086a..a301cad3 100644 --- a/backend/reconcile/src/operations/operation.rs +++ b/backend/reconcile/src/operations/operation.rs @@ -122,6 +122,11 @@ impl Operation { self.start_index() + self.len() - 1 } + /// Returns the range of indices of characters that the operation affects, inclusive. + pub fn range(&self) -> std::ops::RangeInclusive { + self.start_index()..=self.end_index() + } + /// Returns the number of affected characters. It is always greater than 0 because empty operations cannot be created. pub fn len(&self) -> usize { match self { @@ -139,11 +144,6 @@ impl Operation { false } - /// Returns the range of indices of characters that the operation affects, inclusive. - pub fn range(&self) -> std::ops::RangeInclusive { - self.start_index()..=self.end_index() - } - /// Clones the operation while updating the index. pub fn with_index(&self, index: usize) -> Self { match self { @@ -180,70 +180,6 @@ impl Operation { Ok(self.with_index(non_negative_index)) } - - /// Merges the operation with another operation that is consequtive to this operation. - /// The other operation must start where this operation ends. - /// The two operations must be of the same type, otherwise panics. - pub fn merge(self, other: &Self) -> Self { - match (self, other) { - ( - Operation::Insert { index, text }, - Operation::Insert { - text: other_text, .. - }, - ) => { - let end_index = index + text.chars().count(); - debug_assert!( - end_index == other.start_index(), - "Cannot merge non-consequtive inserts with index {} and {}", - end_index, - other.start_index() - ); - - Operation::Insert { - index, - text: text + other_text, - } - } - ( - Operation::Delete { - index, - deleted_character_count, - - #[cfg(debug_assertions)] - deleted_text, - }, - Operation::Delete { - index: other_index, - deleted_character_count: other_deleted_character_count, - - #[cfg(debug_assertions)] - deleted_text: other_deleted_text, - }, - ) => { - debug_assert!( - index == *other_index, - "Cannot merge non-consequtive deletes", - ); - - Operation::Delete { - index, - deleted_character_count: deleted_character_count - + other_deleted_character_count, - - #[cfg(debug_assertions)] - deleted_text: deleted_text - .into_iter() - .flat_map(|t1| other_deleted_text.as_ref().map(|t2| t1 + t2).into_iter()) - .last(), - } - } - (this, other) => panic!( - "Cannot merge operations of different type: {:?} and {:?}", - &this, &other - ), - } - } } impl Display for Operation { diff --git a/backend/reconcile/src/operations/operation_sequence.rs b/backend/reconcile/src/operations/operation_sequence.rs index 9170cedc..41d04697 100644 --- a/backend/reconcile/src/operations/operation_sequence.rs +++ b/backend/reconcile/src/operations/operation_sequence.rs @@ -1,10 +1,11 @@ -use std::cmp::Ordering; +use std::{cmp::Ordering, result, vec}; use super::Operation; use crate::diffs::myers::diff; use crate::diffs::raw_operation::RawOperation; use crate::errors::SyncLibError; use crate::tokenizer::token::Token; +use itertools::Itertools; use ropey::Rope; #[cfg(feature = "serde")] @@ -26,27 +27,37 @@ struct MergeContext { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone, PartialEq, Eq, Hash, Default)] pub struct OperationSequence { - operations: Vec, + operations: Vec<(usize, Operation)>, } impl OperationSequence { /// Creates a new OperationSequence with the given operations. /// The operations should be in the order they should be applied. /// The operations must not overlap. - pub fn new(operations: Vec) -> Self { - operations - .iter() - .zip(operations.iter().skip(1)) - .for_each(|(previous, next)| { + pub fn new(operations: Vec<(usize, Operation)>) -> Self { + operations.iter().zip(operations.iter().skip(1)).for_each( + |((i_prev, previous), (i_next, next))| { debug_assert!( - previous.start_index() <= next.start_index(), - "{} doesn't come before {}", + i_prev == i_next + || i_prev + previous.len() <= *i_next + || !(matches!(previous, Operation::Delete { .. }) + && matches!(next, Operation::Insert { .. })), + "{} and {} overlap with old index {i_prev} and {i_next}", previous, next ); - }); + debug_assert!( + previous.start_index() <= next.start_index(), + "{} must not come before {} yet it does", + previous, + next + ); + }, + ); - Self { operations } + Self { + operations, // operations: Self::merge_subsequent_operations(operations), + } } /// Creates an OperationSequence from the given original (old) and updated (new) strings. @@ -58,37 +69,55 @@ impl OperationSequence { let diff: Vec = diff(&original_tokens, &updated_tokens); - Self::new(Self::raw_operations_to_operations(diff)) + Self::new(Self::cook_operations(diff)) } - fn raw_operations_to_operations(raw_operations: Vec) -> Vec { - let mut index = 0; + fn cook_operations(raw_operations: Vec) -> Vec<(usize, Operation)> { + let mut new_index = 0; + let mut old_index = 0; + raw_operations .into_iter() .flat_map(|raw_operation| { - match raw_operation { + let length = raw_operation.original_text_length(); + + let operation = match raw_operation { RawOperation::Equal(..) => { - index += raw_operation.original_text_length(); + new_index += length; + old_index += length; + None } RawOperation::Insert(..) => { - let length = raw_operation.original_text_length(); - let result = - Operation::create_insert(index, raw_operation.get_original_text()); - index += length; - result + let op = + Operation::create_insert(new_index, raw_operation.get_original_text()) + .map(|op| (old_index, op)); + + new_index += length; + + op } RawOperation::Delete(..) => { - Operation::create_delete_with_text(index, raw_operation.get_original_text()) + let op = Operation::create_delete_with_text( + new_index, + raw_operation.get_original_text(), + ) + .map(|op| (old_index, op)); + + old_index += length; + + op } - } - .into_iter() + }; + + operation.into_iter() }) + .sorted_by_key(|(order, _)| *order) .collect() } pub fn merge(&self, other: &Self) -> Result { - let mut merged_operations = + let mut merged_operations: Vec = Vec::with_capacity(self.operations.len() + other.operations.len()); let mut left_merge_context = MergeContext::default(); @@ -98,81 +127,55 @@ impl OperationSequence { let mut right_index: usize = 0; loop { - let shifted_left_op = self - .operations - .get(left_index) - .map(|op| { - Self::pick_up_dangling_delete_from_affecting_context( - op, - &mut right_merge_context, - ); - op.with_shifted_index(right_merge_context.shift) - }) - .transpose()?; + let left_op = self.operations.get(left_index); + let right_op = other.operations.get(right_index); - let shifted_right_op = other - .operations - .get(right_index) - .map(|op| { - Self::pick_up_dangling_delete_from_affecting_context( - op, - &mut left_merge_context, - ); - op.with_shifted_index(left_merge_context.shift) - }) - .transpose()?; + let order = left_op + .map(|(order, _)| order) + .cmp(&right_op.map(|(order, _)| order)); - let left_op_index = shifted_left_op - .as_ref() - .map(|op| { - op.start_index().max( - left_merge_context - .last_delete - .as_ref() - .map(|op| op.end_index()) - .unwrap_or_default(), - ) as i64 - }) - .unwrap_or_default(); + println!("left_op: {:#?} <> right_op: {:#?}", left_op, right_op); - let right_op_index = shifted_right_op - .as_ref() - .map(|op| { - op.start_index().max( - right_merge_context - .last_delete - .as_ref() - .map(|op| op.end_index()) - .unwrap_or_default(), - ) as i64 - }) - .unwrap_or_default(); + let left_op = left_op.map(|(_, op)| op); + let right_op = right_op.map(|(_, op)| op); - let result = left_op_index.cmp(&right_op_index); - let order = if result == Ordering::Equal - && shifted_left_op.is_some() - && shifted_right_op.is_some() - { - match ( - shifted_left_op.as_ref().unwrap(), - shifted_right_op.as_ref().unwrap(), - ) { - (Operation::Insert { .. }, Operation::Delete { .. }) => Ordering::Greater, - (Operation::Delete { .. }, Operation::Insert { .. }) => Ordering::Less, - _ => Ordering::Equal, - } - } else { - result - }; + // let order = if order == Ordering::Equal { + // match (left_op.as_ref(), right_op.as_ref()) { + // (Some(Operation::Insert { .. }), Some(Operation::Delete { .. })) => { + // Ordering::Greater + // } + // (Some(Operation::Delete { .. }), Some(Operation::Insert { .. })) => { + // Ordering::Less + // } + // _ => Ordering::Equal, + // } + // } else { + // order + // }; - match (shifted_left_op, shifted_right_op, order) { + // debug_assert!( + // right_merge_context.last_delete.is_none() + // || left_merge_context.last_delete.is_none(), + // "Both contexts have a last delete" + // ); + + match (left_op, right_op, order) { (Some(left_op), None, _) | (Some(left_op), Some(_), std::cmp::Ordering::Less | std::cmp::Ordering::Equal) => { + Self::pick_up_dangling_delete_from_affecting_context( + left_op.start_index(), + &mut right_merge_context, + ); + if let Some(op) = Self::merge_operations_with_context( - left_op, + left_op.with_shifted_index(right_merge_context.shift)?, &mut right_merge_context, &mut left_merge_context, )? { + // println!("merged {:#?}", &op); + if let Some(last) = merged_operations.last() { + debug_assert!(op.start_index() >= last.start_index()); + } merged_operations.push(op); } @@ -180,11 +183,20 @@ impl OperationSequence { } (None, Some(right_op), _) | (Some(_), Some(right_op), std::cmp::Ordering::Greater) => { + Self::pick_up_dangling_delete_from_affecting_context( + right_op.start_index(), + &mut left_merge_context, + ); + if let Some(op) = Self::merge_operations_with_context( - right_op, + right_op.with_shifted_index(left_merge_context.shift)?, &mut left_merge_context, &mut right_merge_context, )? { + // println!("merged {:#?}", &op); + if let Some(last) = merged_operations.last() { + debug_assert!(op.start_index() >= last.start_index()); + } merged_operations.push(op); } @@ -194,13 +206,22 @@ impl OperationSequence { break; } }; + + println!( + "{:#?} <> {:#?}\n\n\n", + left_merge_context, right_merge_context + ); } - Ok(Self::new(merged_operations)) + println!("merged_operations: {:#?}", merged_operations.to_vec()); + + Ok(Self::new( + merged_operations.into_iter().map(|op| (0, op)).collect(), + )) } pub fn apply<'a>(&self, rope_text: &'a mut Rope) -> Result<&'a mut Rope, SyncLibError> { - for operation in &self.operations { + for (_, operation) in &self.operations { operation.apply(rope_text)?; } @@ -230,46 +251,52 @@ impl OperationSequence { (operation @ Operation::Insert { .. }, Some(last_delete)) => { produced_context.shift += operation.len() as i64; - if last_delete.range().contains(&operation.start_index()) { - let moved_operation = operation.with_index(last_delete.start_index()); + debug_assert!( + last_delete.range().contains(&operation.start_index()), + "There is a last delete ({last_delete}) but the operation ({operation}) is not contained in it" + ); - affecting_context.last_delete = Operation::create_delete( - moved_operation.end_index() + 1, - last_delete.len(), - ); + let difference = + operation.start_index() as i64 - last_delete.start_index() as i64; - Some(moved_operation) - } else { - Some(operation) - } + let moved_operation = operation.with_index(last_delete.start_index()); + + affecting_context.last_delete = Operation::create_delete( + moved_operation.end_index() + 1, + (last_delete.len() as i64 - difference) as usize, + ); + affecting_context.shift -= difference; + + Some(moved_operation) } (operation @ Operation::Delete { .. }, Some(last_delete)) => { - let updated_delete = if last_delete.range().contains(&operation.start_index()) { - let overlap = - last_delete.end_index() as i64 - operation.start_index() as i64 + 1; + debug_assert!( + last_delete.range().contains(&operation.start_index()), + "There is a last delete ({last_delete}) but the operation ({operation}) is not contained in it" + ); - affecting_context.last_delete = Operation::create_delete( - last_delete.start_index(), - 0.max(last_delete.len() as i64 - operation.len() as i64) as usize, - ); + let difference = + operation.start_index() as i64 - last_delete.start_index() as i64; - if last_delete.end_index() < operation.end_index() { - affecting_context.shift -= last_delete.len() as i64 - overlap - } + let updated_delete = Operation::create_delete( + last_delete.start_index(), + 0.max(operation.end_index() as i64 - last_delete.end_index() as i64) + as usize, + ); - Operation::create_delete( - last_delete.start_index(), - 0.max(operation.len() as i64 - overlap) as usize, - ) - } else { - Some(operation) - }; + affecting_context.shift -= difference; + affecting_context.last_delete = Operation::create_delete( + last_delete.start_index(), + 0.max(last_delete.end_index() as i64 - operation.end_index() as i64) + as usize, + ); Self::replace_delete_in_produced_context( produced_context, updated_delete.clone(), ); + updated_delete } }, @@ -288,12 +315,12 @@ impl OperationSequence { } fn pick_up_dangling_delete_from_affecting_context( - next_operation: &Operation, + start_index: usize, affecting_context: &mut MergeContext, ) { match affecting_context.last_delete.as_ref() { Some(last_delete) - if next_operation.start_index() as i64 + affecting_context.shift + if start_index as i64 + affecting_context.shift > last_delete.end_index() as i64 => { affecting_context.shift -= last_delete.len() as i64; @@ -328,16 +355,6 @@ mod tests { assert_eq!(new_right.to_string(), right); } - #[test] - fn test_calculate_operations_with_large_diff() { - let left = "hello world! How are you? Adam"; - let right = "Hello, my friend! How are you doing? Albert"; - - let result = OperationSequence::from_strings(left, right); - - insta::assert_debug_snapshot!(result); - } - #[test] fn test_calculate_operations_with_no_diff() { let left = "hello world!"; @@ -445,7 +462,7 @@ mod tests { #[test] - fn test_merge_files_without_panicing() { + fn test_merge_files_without_panic() { let files = vec![ "pride_and_prejudice.txt", "romeo_and_juliet.txt", @@ -474,31 +491,31 @@ mod tests { } fn test_merge(original: &str, edit_1: &str, edit_2: &str) -> String { - println!( - "original: '{:#}'", - original[..100.min(original.len())].to_string() - ); - println!( - "edit_1: '{:#}'", - edit_1[..100.min(edit_1.len())].to_string() - ); - println!( - "edit_2: '{:#}'", - edit_2[..100.min(edit_2.len())].to_string() - ); + // println!( + // "original: '{:#}'", + // original[..100.min(original.len())].to_string() + // ); + // println!( + // "edit_1: '{:#}'", + // edit_1[..100.min(edit_1.len())].to_string() + // ); + // println!( + // "edit_2: '{:#}'", + // edit_2[..100.min(edit_2.len())].to_string() + // ); let mut original = Rope::from_str(original); let operations_1 = OperationSequence::from_strings(&original.to_string(), edit_1); - println!( - "operations_1: {:?}", - operations_1.operations[..20.min(operations_1.operations.len())].to_vec() - ); + // println!( + // "operations_1: {:#?}", + // operations_1.operations[..20.min(operations_1.operations.len())].to_vec() + // ); let operations_2 = OperationSequence::from_strings(&original.to_string(), edit_2); - println!( - "operations_2: {:?}", - operations_2.operations[..20.min(operations_2.operations.len())].to_vec() - ); + // println!( + // "operations_2: {:#?}", + // operations_2.operations[..20.min(operations_2.operations.len())].to_vec() + // ); assert_eq!( operations_1 diff --git a/backend/reconcile/src/operations/snapshots/reconcile__operations__operation_sequence__tests__calculate_operations.snap b/backend/reconcile/src/operations/snapshots/reconcile__operations__operation_sequence__tests__calculate_operations.snap index a6305408..3b5eef94 100644 --- a/backend/reconcile/src/operations/snapshots/reconcile__operations__operation_sequence__tests__calculate_operations.snap +++ b/backend/reconcile/src/operations/snapshots/reconcile__operations__operation_sequence__tests__calculate_operations.snap @@ -5,38 +5,56 @@ snapshot_kind: text --- OperationSequence { operations: [ - Insert { - index: 0, - text: "Hello, my friend! ", - }, - Delete { - index: 18, - deleted_character_count: 13, - deleted_text: Some( - "hello world! ", - ), - }, - Delete { - index: 26, - deleted_character_count: 5, - deleted_text: Some( - "you? ", - ), - }, - Delete { - index: 26, - deleted_character_count: 5, - deleted_text: Some( - " Adam", - ), - }, - Insert { - index: 26, - text: "you ", - }, - Insert { - index: 30, - text: "doing? Albert", - }, + ( + 0, + Insert { + index: 0, + text: "Hello, my friend! ", + }, + ), + ( + 0, + Delete { + index: 18, + deleted_character_count: 13, + deleted_text: Some( + "hello world! ", + ), + }, + ), + ( + 21, + Delete { + index: 26, + deleted_character_count: 5, + deleted_text: Some( + "you? ", + ), + }, + ), + ( + 26, + Delete { + index: 26, + deleted_character_count: 5, + deleted_text: Some( + " Adam", + ), + }, + ), + ( + 31, + Insert { + index: 26, + text: "you ", + }, + ), + ( + 31, + Insert { + index: 30, + text: "doing? Albert", + }, + ), ], } diff --git a/backend/reconcile/src/operations/snapshots/reconcile__operations__operation_sequence__tests__calculate_operations_with_large_diff.snap b/backend/reconcile/src/operations/snapshots/reconcile__operations__operation_sequence__tests__calculate_operations_with_large_diff.snap index f116c99d..810d3413 100644 --- a/backend/reconcile/src/operations/snapshots/reconcile__operations__operation_sequence__tests__calculate_operations_with_large_diff.snap +++ b/backend/reconcile/src/operations/snapshots/reconcile__operations__operation_sequence__tests__calculate_operations_with_large_diff.snap @@ -5,38 +5,56 @@ snapshot_kind: text --- OperationSequence { operations: [ - Insert { - index: 0, - text: "Hello, my friend! ", - }, - Delete { - index: 18, - deleted_character_count: 13, - deleted_text: Some( - "hello world! ", - ), - }, - Delete { - index: 26, - deleted_character_count: 5, - deleted_text: Some( - "you? ", - ), - }, - Delete { - index: 26, - deleted_character_count: 5, - deleted_text: Some( - " Adam", - ), - }, - Insert { - index: 26, - text: "you ", - }, - Insert { - index: 30, - text: "doing? Albert", - }, + ( + 0, + Insert { + index: 0, + text: "Hello, my friend! ", + }, + ), + ( + 0, + Delete { + index: 18, + deleted_character_count: 13, + deleted_text: Some( + "hello world! ", + ), + }, + ), + ( + 21, + Delete { + index: 26, + deleted_character_count: 5, + deleted_text: Some( + "you? ", + ), + }, + ), + ( + 26, + Delete { + index: 26, + deleted_character_count: 5, + deleted_text: Some( + " Adam", + ), + }, + ), + ( + 31, + Insert { + index: 26, + text: "you ", + }, + ), + ( + 31, + Insert { + index: 30, + text: "doing? Albert", + }, + ), ], }