This commit is contained in:
Andras Schmelczer 2024-11-24 17:47:48 +00:00
parent c02f84a476
commit 143883a899
No known key found for this signature in database
GPG key ID: FC8F2C3D3D1A718C
12 changed files with 801 additions and 645 deletions

View file

@ -0,0 +1,279 @@
use std::borrow::BorrowMut;
use super::{operation, Operation};
use crate::diffs::raw_operation::RawOperation;
use crate::errors::SyncLibError;
use crate::operation_transformation::merge_context::MergeContext;
use crate::tokenizer::token::Token;
use crate::utils::ordered_operation::OrderedOperation;
use crate::utils::side::Side;
use crate::{diffs::myers::diff, utils::merge_iters::MergeSorted};
use ropey::Rope;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
/// A sequence of operations that can be applied to a text document.
/// EditedText supports merging two sequences of operations using the
/// principle of Operational Transformation.
///
/// It's mainly created through the from_strings method, then merged with another
/// EditedText derived from the same original text and then applied to the original text
/// to get the reconciled text of concurrent edits.
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone, PartialEq, Eq, Hash, Default)]
pub struct EditedText<'a> {
text: &'a str,
operations: Vec<OrderedOperation>,
}
impl<'a> EditedText<'a> {
/// Create an EditedText from the given original (old) and updated (new) strings.
/// The returned EditedText represents the changes from the original to the updated text.
/// When the return value is applied to the original text, it will result in the updated text.
pub fn from_strings(original: &'a str, updated: &str) -> Self {
let original_tokens = Token::tokenize(original);
let updated_tokens = Token::tokenize(updated);
let diff: Vec<RawOperation> = diff(&original_tokens, &updated_tokens);
Self::new(
original,
Self::elongate_operations(Self::cook_operations(diff)),
)
}
// Turn raw operations into ordered operations while keeping track of old & new indexes.
fn cook_operations(raw_operations: Vec<RawOperation>) -> Vec<OrderedOperation> {
let mut new_index = 0; // this is the start index of the operation on the new text
let mut order = 0; // this is the start index of the operation on the original text
raw_operations
.into_iter()
.flat_map(|raw_operation| {
let length = raw_operation.original_text_length();
let operation = match raw_operation {
RawOperation::Equal(..) => {
new_index += length;
order += length;
None
}
RawOperation::Insert(..) => {
let op =
Operation::create_insert(new_index, raw_operation.get_original_text())
.map(|operation| OrderedOperation { order, operation });
new_index += length;
op
}
RawOperation::Delete(..) => {
let op = if cfg!(debug_assertions) {
Operation::create_delete_with_text(
new_index,
raw_operation.get_original_text(),
)
} else {
Operation::create_delete(new_index, length)
}
.map(|operation| OrderedOperation { order, operation });
order += length;
op
}
};
operation.into_iter()
})
.collect()
}
// TODO: shift ops befor compacting
fn elongate_operations(operations: Vec<OrderedOperation>) -> Vec<OrderedOperation> {
let mut maybe_previous: Option<OrderedOperation> = None;
let mut result: Vec<OrderedOperation> = operations
.into_iter()
.flat_map(|next| {
if let Some(previous) = maybe_previous.take() {
match (previous, next) {
(
previous @ OrderedOperation {
operation: Operation::Insert { .. },
..
},
next @ OrderedOperation {
operation: Operation::Insert { .. },
..
},
) if previous.operation.end_index() + 1 == next.operation.start_index() => {
maybe_previous = Some(OrderedOperation {
order: previous.order,
operation: previous.operation.extend(&next.operation),
});
None
}
(
previous @ OrderedOperation {
operation: Operation::Delete { .. },
..
},
next @ OrderedOperation {
operation: Operation::Delete { .. },
..
},
) if previous.operation.start_index() == next.operation.start_index() => {
maybe_previous = Some(OrderedOperation {
order: previous.order,
operation: previous.operation.extend(&next.operation),
});
None
}
(previous, next) => {
maybe_previous = Some(next);
Some(previous)
}
}
} else {
maybe_previous = Some(next.clone());
None
}
.into_iter()
})
.collect();
if let Some(prev) = maybe_previous {
result.push(prev);
}
result
}
/// Create a new EditedText with the given operations.
/// The operations must be in the order in which they are meant to be applied.
/// The operations must not overlap.
fn new(text: &'a str, operations: Vec<OrderedOperation>) -> Self {
operations
.iter()
.zip(operations.iter().skip(1))
.for_each(|(previous, next)| {
debug_assert!(
previous.operation.start_index() <= next.operation.start_index(),
"{} must not come before {} yet it does",
previous.operation,
next.operation
);
});
Self { text, operations }
}
pub fn merge(self, other: Self) -> Self {
debug_assert_eq!(
self.text, other.text,
"EditedTexts must be derived from the same text to be mergable"
);
let mut left_merge_context = MergeContext::default();
let mut right_merge_context = MergeContext::default();
Self::new(
self.text,
self.operations
.into_iter()
.map(|op| (op, Side::Left))
.merge_sorted_by_key(
other.operations.into_iter().map(|op| (op, Side::Right)),
|(operation, _)| operation.order,
)
.flat_map(|(OrderedOperation { order, operation }, side)| {
match side {
Side::Left => operation.merge_operations_with_context(
&mut right_merge_context,
&mut left_merge_context,
),
Side::Right => operation.merge_operations_with_context(
&mut left_merge_context,
&mut right_merge_context,
),
}
.map(|operation| OrderedOperation { order, operation })
.into_iter()
})
.collect(),
)
}
/// Apply the operations to the text and return the resulting text.
///
/// # Errors
///
/// Returns an SyncLibError::OperationError if the operations cannot be applied to the text.
pub fn apply(&self) -> Result<String, SyncLibError> {
let mut text = Rope::from_str(self.text);
self.operations
.iter()
.try_fold(
&mut text,
|rope_text, OrderedOperation { operation, .. }| operation.apply(rope_text),
)
.map(|rope| rope.to_string())
}
}
#[cfg(test)]
mod tests {
use std::{env, fs, ops::Range, path::Path};
use pretty_assertions::assert_eq;
use test_case::test_matrix;
use super::*;
#[test]
fn test_calculate_operations() {
let left = "hello world! How are you? Adam";
let right = "Hello, my friend! How are you doing? Albert";
let operations = EditedText::from_strings(left, right);
insta::assert_debug_snapshot!(operations);
let new_right = operations.apply().unwrap();
assert_eq!(new_right.to_string(), right);
}
#[test]
fn test_calculate_operations_with_no_diff() {
let text = "hello world!";
let operations = EditedText::from_strings(text, text);
assert_eq!(operations.operations.len(), 0);
let new_right = operations.apply().unwrap();
assert_eq!(new_right.to_string(), text);
}
#[test]
fn test_calculate_operations_with_insert() {
let original = "hello world! ...";
let left = "Hello world! How are you?";
let right = "hello world! I'm Andras.";
let expected = "Hello world! I'm Andras. How are you?";
let operations_1 = EditedText::from_strings(original, left);
println!("{:#?}", operations_1);
let operations_2 = EditedText::from_strings(original, right);
println!("{:#?}", operations_2);
let operations = operations_1.merge(operations_2);
assert_eq!(operations.apply().unwrap(), expected);
}
}

View file

@ -0,0 +1,34 @@
use crate::operation_transformation::{operation, Operation};
#[derive(Debug, Clone, Default)]
pub struct MergeContext {
pub last_delete: Option<Operation>,
pub shift: i64,
}
impl MergeContext {
/// Replace the last delete operation (if there was one) with a new one while
/// applying it to the shift.
pub fn replace_delete(&mut self, delete: Option<Operation>) {
if let Some(produced_last_delete) = self.last_delete.take() {
self.shift -= produced_last_delete.len() as i64;
}
self.last_delete = delete;
}
/// Remove the last delete operation (if there was one) in case it is behind the
/// threshold operation.
pub fn consume_delete_if_behind_operation(&mut self, threshold_operation: &Operation) {
match self.last_delete.as_ref() {
Some(last_delete)
if threshold_operation.start_index() as i64 + self.shift
> last_delete.end_index() as i64 =>
{
self.shift -= last_delete.len() as i64;
self.last_delete = None;
}
_ => {}
}
}
}

View file

@ -0,0 +1,169 @@
mod edited_text;
mod merge_context;
mod operation;
pub use edited_text::EditedText;
pub use operation::Operation;
use crate::errors::SyncLibError;
pub fn reconcile(original: &str, left: &str, right: &str) -> Result<String, SyncLibError> {
let left_operations = EditedText::from_strings(original, left);
let right_operations = EditedText::from_strings(original, right);
let merged_operations = left_operations.merge(right_operations);
merged_operations.apply()
}
#[cfg(test)]
mod test {
use std::{env, fs, ops::Range, path::Path};
use pretty_assertions::assert_eq;
use ropey::Rope;
use test_case::test_matrix;
use super::*;
#[test]
fn test_merges() {
// Both replaced one token but different
test_merge_both_ways(
"original_1 original_2 original_3",
"original_1 edit_1 original_3",
"original_1 original_2 edit_2",
"original_1 edit_1 edit_2",
);
// Both replaced the same one token
test_merge_both_ways(
"original_1 original_2 original_3",
"original_1 edit_1 original_3",
"original_1 edit_1 original_3",
"original_1 edit_1 edit_1 original_3",
);
// One deleted a large range, the other deleted subranges and inserted as well
test_merge_both_ways(
"original_1 original_2 original_3 original_4 original_5",
"original_1 original_5",
"original_1 edit_1 original_3 edit_2 original_5",
"original_1 edit_1 edit_2 original_5",
);
// One deleted a large range, the other inserted and deleted a partially overlapping range
test_merge_both_ways(
"original_1 original_2 original_3 original_4 original_5",
"original_1 original_5",
"original_1 edit_1 original_3 edit_2",
"original_1 edit_1 edit_2",
);
// Merge a replace and an append
test_merge_both_ways("a b ", "c d ", "a b c d ", "c d c d ");
test_merge_both_ways("a b c d e", "a e", "a c e", "a e");
test_merge_both_ways("a 0 1 2 b", "a b", "a E 1 F b", "a E F b");
test_merge_both_ways(
"a this one delete b",
"a b",
"a my one change b",
"a my change b",
);
test_merge_both_ways(
"this stays, this is one big delete, don't touch this",
"this stays, don't touch this",
"this stays, my one change, don't touch this",
"this stays, my change, don't touch this",
);
test_merge_both_ways("1 2 3 4 5 6", "1 6", "1 2 4 ", "1 ");
test_merge_both_ways(
"hello world",
"hi, world",
"hello my friend!",
"hi, my friend!",
);
// test_merge_both_ways("hello world", "world !", "hi hello world", "hi world !");
test_merge_both_ways(
"both delete the same word",
"both the same word",
"both the same word",
"both the same word",
);
test_merge_both_ways(" ", "its utf-8!", " ", "its utf-8!");
test_merge_both_ways(
"both delete the same word but one a bit more",
"both the same word",
"both same word",
"both same wordword",
);
test_merge_both_ways(
"long text with one big delete and many small",
"long small",
"long with big and small",
"long small",
);
}
#[test_matrix( [
"pride_and_prejudice.txt",
"romeo_and_juliet.txt",
"room_with_a_view.txt",
"kun_lu.txt",
], [
"pride_and_prejudice.txt",
"romeo_and_juliet.txt",
"room_with_a_view.txt",
"kun_lu.txt"
], [
"pride_and_prejudice.txt",
"romeo_and_juliet.txt",
"room_with_a_view.txt",
"kun_lu.txt"
], [0..10000, 10000..20000], [0..10000, 10000..20000], [0..10000, 10000..20000])]
fn test_merge_files_without_panic(
file_name_1: &str,
file_name_2: &str,
file_name_3: &str,
range_1: Range<usize>,
range_2: Range<usize>,
range_3: Range<usize>,
) {
let files = vec![file_name_1, file_name_2, file_name_3];
let permutations = vec![range_1, range_2, range_3];
let root = Path::new("test/resources/");
let contents = files
.iter()
.zip(permutations.iter())
.map(|(file, range)| {
let path = root.join(file);
fs::read_to_string(&path)
.unwrap()
.chars()
.skip(range.start)
.take(range.end)
.collect::<String>()
})
.collect::<Vec<_>>();
reconcile(&contents[0], &contents[1], &contents[2]).unwrap();
}
fn test_merge_both_ways(original: &str, edit_1: &str, edit_2: &str, expected: &str) {
assert_eq!(reconcile(original, edit_1, edit_2).unwrap(), expected);
assert_eq!(reconcile(original, edit_2, edit_1).unwrap(), expected);
}
}

View file

@ -6,9 +6,11 @@ use crate::errors::SyncLibError;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use super::merge_context::MergeContext;
/// Represents a change that can be applied to a text document.
/// Operation is tied to a ropey::Rope and is mainly expected to be
/// created by OperationSequence.
/// created by EditedText.
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum Operation {
@ -68,6 +70,15 @@ impl Operation {
}
/// Tries to apply the operation to the given ropey::Rope text, returning the modified text.
///
/// # Errors
///
/// Returns a SyncLibError::OperationApplicationError if the operation cannot be applied.
///
/// # Panics
///
/// When compiled in debug mode, panics if a delete operation is attempted on a range
/// of text that does not match the text to be deleted.
pub fn apply<'a>(&self, rope_text: &'a mut Rope) -> Result<&'a mut Rope, SyncLibError> {
match self {
Operation::Insert { text, .. } => rope_text
@ -145,12 +156,9 @@ impl Operation {
}
/// Clones the operation while updating the index.
pub fn with_index(&self, index: usize) -> Self {
pub fn with_index(self, index: usize) -> Self {
match self {
Operation::Insert { text, .. } => Operation::Insert {
index,
text: text.clone(),
},
Operation::Insert { text, .. } => Operation::Insert { index, text },
Operation::Delete {
deleted_character_count,
@ -159,26 +167,159 @@ impl Operation {
..
} => Operation::Delete {
index,
deleted_character_count: *deleted_character_count,
deleted_character_count,
#[cfg(debug_assertions)]
deleted_text: deleted_text.clone(),
deleted_text,
},
}
}
/// Clones the operation while shifting the index by the given offset.
/// The offset can be negative but the resulting index must be non-negative.
pub fn with_shifted_index(&self, offset: i64) -> Result<Self, SyncLibError> {
///
/// # Panics
///
/// In debug mode, panics if the resulting index is negative.
pub fn with_shifted_index(self, offset: i64) -> Self {
let index = self.start_index() as i64 + offset;
let non_negative_index = index.try_into().map_err(|_| {
SyncLibError::NegativeOperationIndexError(format!(
"Index {} is negative but operations must have a non-negative index",
index
))
})?;
debug_assert!(index >= 0, "Shifted index must be non-negative");
Ok(self.with_index(non_negative_index))
self.with_index(index as usize)
}
/// Merges the operation with the given context, producing a new operation and updating the context.
/// This implements a comples FSM that handles the merging of operations in a way that is consistent with the text.
/// The contexts are updated in-place.
pub fn merge_operations_with_context(
self,
affecting_context: &mut MergeContext,
produced_context: &mut MergeContext,
) -> Option<Operation> {
affecting_context.consume_delete_if_behind_operation(&self);
let operation = self.with_shifted_index(affecting_context.shift);
match (operation, affecting_context.last_delete.clone()) {
(operation @ Operation::Insert { .. }, None) => {
produced_context.shift += operation.len() as i64;
Some(operation)
}
(operation @ Operation::Delete { .. }, None) => {
produced_context.replace_delete(Some(operation.clone()));
Some(operation)
}
(operation @ Operation::Insert { .. }, Some(last_delete)) => {
produced_context.shift += operation.len() as i64;
debug_assert!(
last_delete.range().contains(&operation.start_index()),
"There is a last delete ({last_delete}) but the operation ({operation}) is not contained in it"
);
let difference = operation.start_index() as i64 - last_delete.start_index() as i64;
let moved_operation = operation.with_index(last_delete.start_index());
affecting_context.last_delete = Operation::create_delete(
moved_operation.end_index() + 1,
(last_delete.len() as i64 - difference) as usize,
);
affecting_context.shift -= difference;
Some(moved_operation)
}
(operation @ Operation::Delete { .. }, Some(last_delete)) => {
debug_assert!(
last_delete.range().contains(&operation.start_index()),
"There is a last delete ({last_delete}) but the operation ({operation}) is not contained in it"
);
let difference = operation.start_index() as i64 - last_delete.start_index() as i64;
let updated_delete = Operation::create_delete(
last_delete.start_index(),
0.max(operation.end_index() as i64 - last_delete.end_index() as i64) as usize,
);
affecting_context.shift -= difference;
affecting_context.last_delete = Operation::create_delete(
last_delete.start_index(),
0.max(last_delete.end_index() as i64 - operation.end_index() as i64) as usize,
);
produced_context.replace_delete(updated_delete.clone());
updated_delete
}
}
}
/// Merges the operation with another operation that is consequtive to this operation.
/// The other operation must start where this operation ends.
/// The two operations must be of the same type, otherwise panics.
pub fn extend(self, other: &Self) -> Self {
match (self, other) {
(
Operation::Insert { index, text },
Operation::Insert {
text: other_text, ..
},
) => {
let end_index = index + text.chars().count();
debug_assert!(
end_index == other.start_index(),
"Cannot merge non-consequtive inserts with index {} and {}",
end_index,
other.start_index()
);
Operation::Insert {
index,
text: text + other_text,
}
}
(
Operation::Delete {
index,
deleted_character_count,
#[cfg(debug_assertions)]
deleted_text,
},
Operation::Delete {
index: other_index,
deleted_character_count: other_deleted_character_count,
#[cfg(debug_assertions)]
deleted_text: other_deleted_text,
},
) => {
debug_assert!(
index == *other_index,
"Cannot merge non-consequtive deletes",
);
Operation::Delete {
index,
deleted_character_count: deleted_character_count
+ other_deleted_character_count,
#[cfg(debug_assertions)]
deleted_text: deleted_text
.into_iter()
.flat_map(|t1| other_deleted_text.as_ref().map(|t2| t1 + t2).into_iter())
.last(),
}
}
(this, other) => panic!(
"Cannot merge operations of different type: {:?} and {:?}",
&this, &other
),
}
}
}
@ -195,7 +336,7 @@ impl Display for Operation {
#[cfg(debug_assertions)]
deleted_text,
} => {
if cfg!(debug_assertions) {
if cfg!(debug_assertions) && deleted_text.is_some() {
write!(
f,
"<delete '{}' from index {}>",
@ -220,6 +361,7 @@ mod tests {
use pretty_assertions::assert_eq;
#[test]
#[should_panic]
fn test_shifting_error() {
insta::assert_debug_snapshot!(Operation::create_insert(1, "hi".to_string())
.unwrap()

View file

@ -0,0 +1,61 @@
---
source: reconcile/src/operation_transformation/edited_text.rs
expression: operations
snapshot_kind: text
---
EditedText {
text: "hello world! How are you? Adam",
operations: [
OrderedOperation {
order: 0,
operation: Insert {
index: 0,
text: "Hello, my friend! ",
},
},
OrderedOperation {
order: 0,
operation: Delete {
index: 18,
deleted_character_count: 13,
deleted_text: Some(
"hello world! ",
),
},
},
OrderedOperation {
order: 21,
operation: Delete {
index: 26,
deleted_character_count: 5,
deleted_text: Some(
"you? ",
),
},
},
OrderedOperation {
order: 26,
operation: Delete {
index: 26,
deleted_character_count: 5,
deleted_text: Some(
" Adam",
),
},
},
OrderedOperation {
order: 31,
operation: Insert {
index: 26,
text: "you ",
},
},
OrderedOperation {
order: 31,
operation: Insert {
index: 30,
text: "doing? Albert",
},
},
],
}

View file

@ -0,0 +1,45 @@
---
source: reconcile/src/operation_transformation/edited_text.rs
assertion_line: 242
expression: operations
snapshot_kind: text
---
EditedText {
text: "hello world! How are you? Adam",
operations: [
OrderedOperation {
order: 0,
operation: Insert {
index: 0,
text: "Hello, my friend! ",
},
},
OrderedOperation {
order: 0,
operation: Delete {
index: 18,
deleted_character_count: 13,
deleted_text: Some(
"hello world! ",
),
},
},
OrderedOperation {
order: 21,
operation: Delete {
index: 26,
deleted_character_count: 10,
deleted_text: Some(
"you? Adam",
),
},
},
OrderedOperation {
order: 31,
operation: Insert {
index: 26,
text: "you doing? Albert",
},
},
],
}

View file

@ -1,60 +1,61 @@
---
source: reconcile/src/operations/operation_sequence.rs
expression: result
source: reconcile/src/operations/edited_text.rs
expression: operations
snapshot_kind: text
---
OperationSequence {
EditedText {
text: "hello world! How are you? Adam",
operations: [
(
0,
Insert {
OrderedOperation {
order: 0,
operation: Insert {
index: 0,
text: "Hello, my friend! ",
},
),
(
0,
Delete {
},
OrderedOperation {
order: 0,
operation: Delete {
index: 18,
deleted_character_count: 13,
deleted_text: Some(
"hello world! ",
),
},
),
(
21,
Delete {
},
OrderedOperation {
order: 21,
operation: Delete {
index: 26,
deleted_character_count: 5,
deleted_text: Some(
"you? ",
),
},
),
(
26,
Delete {
},
OrderedOperation {
order: 26,
operation: Delete {
index: 26,
deleted_character_count: 5,
deleted_text: Some(
" Adam",
),
},
),
(
31,
Insert {
},
OrderedOperation {
order: 31,
operation: Insert {
index: 26,
text: "you ",
},
),
(
31,
Insert {
},
OrderedOperation {
order: 31,
operation: Insert {
index: 30,
text: "doing? Albert",
},
),
},
],
}

View file

@ -3,58 +3,58 @@ source: reconcile/src/operations/operation_sequence.rs
expression: operations
snapshot_kind: text
---
OperationSequence {
EditedText {
operations: [
(
0,
Insert {
OrderedOperation {
order: 0,
operation: Insert {
index: 0,
text: "Hello, my friend! ",
},
),
(
0,
Delete {
},
OrderedOperation {
order: 0,
operation: Delete {
index: 18,
deleted_character_count: 13,
deleted_text: Some(
"hello world! ",
),
},
),
(
21,
Delete {
},
OrderedOperation {
order: 21,
operation: Delete {
index: 26,
deleted_character_count: 5,
deleted_text: Some(
"you? ",
),
},
),
(
26,
Delete {
},
OrderedOperation {
order: 26,
operation: Delete {
index: 26,
deleted_character_count: 5,
deleted_text: Some(
" Adam",
),
},
),
(
31,
Insert {
},
OrderedOperation {
order: 31,
operation: Insert {
index: 26,
text: "you ",
},
),
(
31,
Insert {
},
OrderedOperation {
order: 31,
operation: Insert {
index: 30,
text: "doing? Albert",
},
),
},
],
}

View file

@ -1,25 +0,0 @@
mod operation;
mod operation_sequence;
pub use operation::Operation;
pub use operation_sequence::OperationSequence;
#[cfg(test)]
mod test {
#[test]
fn test_merge() {
// let mut original = Rope::from_str("hello world!");
// let edit_1 = "hi, world";
// let edit_2 = "hello, my friend!";
// let mut operations_1 = calculate_operations(&original.to_string(), edit_1, 1.0).unwrap();
// let mut operations_2 = calculate_operations(&original.to_string(), edit_2, 1.0).unwrap();
// let result =
// merge_and_apply_operations(&mut original, &mut operations_1, &mut operations_2)
// .unwrap();
// assert_eq!(result, "hey, my friend!");
}
}

View file

@ -1,540 +0,0 @@
use std::{cmp::Ordering, result, vec};
use super::Operation;
use crate::diffs::myers::diff;
use crate::diffs::raw_operation::RawOperation;
use crate::errors::SyncLibError;
use crate::tokenizer::token::Token;
use itertools::Itertools;
use ropey::Rope;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Default)]
struct MergeContext {
last_delete: Option<Operation>,
shift: i64,
}
/// A sequence of operations that can be applied to a text document.
/// OperationSequence supports merging two sequences of operations using the
/// principle of Operational Transformation.
///
/// It's mainly created through the from_strings method, then merged with another
/// OperationSequence derived from the same original text and then applied to the original text
/// to get the reconciled text of concurrent edits.
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone, PartialEq, Eq, Hash, Default)]
pub struct OperationSequence {
operations: Vec<(usize, Operation)>,
}
impl OperationSequence {
/// Creates a new OperationSequence with the given operations.
/// The operations should be in the order they should be applied.
/// The operations must not overlap.
pub fn new(operations: Vec<(usize, Operation)>) -> Self {
operations.iter().zip(operations.iter().skip(1)).for_each(
|((i_prev, previous), (i_next, next))| {
debug_assert!(
i_prev == i_next
|| i_prev + previous.len() <= *i_next
|| !(matches!(previous, Operation::Delete { .. })
&& matches!(next, Operation::Insert { .. })),
"{} and {} overlap with old index {i_prev} and {i_next}",
previous,
next
);
debug_assert!(
previous.start_index() <= next.start_index(),
"{} must not come before {} yet it does",
previous,
next
);
},
);
Self {
operations, // operations: Self::merge_subsequent_operations(operations),
}
}
/// Creates an OperationSequence from the given original (old) and updated (new) strings.
/// The returned OperationSequence represents the changes from the original to the updated text.
/// When the return value is applied to the original text, it will result in the updated text.
pub fn from_strings(original: &str, updated: &str) -> Self {
let original_tokens = Token::tokenize(original);
let updated_tokens = Token::tokenize(updated);
let diff: Vec<RawOperation> = diff(&original_tokens, &updated_tokens);
Self::new(Self::cook_operations(diff))
}
fn cook_operations(raw_operations: Vec<RawOperation>) -> Vec<(usize, Operation)> {
let mut new_index = 0;
let mut old_index = 0;
raw_operations
.into_iter()
.flat_map(|raw_operation| {
let length = raw_operation.original_text_length();
let operation = match raw_operation {
RawOperation::Equal(..) => {
new_index += length;
old_index += length;
None
}
RawOperation::Insert(..) => {
let op =
Operation::create_insert(new_index, raw_operation.get_original_text())
.map(|op| (old_index, op));
new_index += length;
op
}
RawOperation::Delete(..) => {
let op = Operation::create_delete_with_text(
new_index,
raw_operation.get_original_text(),
)
.map(|op| (old_index, op));
old_index += length;
op
}
};
operation.into_iter()
})
.sorted_by_key(|(order, _)| *order)
.collect()
}
pub fn merge(&self, other: &Self) -> Result<Self, SyncLibError> {
let mut merged_operations: Vec<Operation> =
Vec::with_capacity(self.operations.len() + other.operations.len());
let mut left_merge_context = MergeContext::default();
let mut right_merge_context = MergeContext::default();
let mut left_index: usize = 0;
let mut right_index: usize = 0;
loop {
let left_op = self.operations.get(left_index);
let right_op = other.operations.get(right_index);
let order = left_op
.map(|(order, _)| order)
.cmp(&right_op.map(|(order, _)| order));
println!("left_op: {:#?} <> right_op: {:#?}", left_op, right_op);
let left_op = left_op.map(|(_, op)| op);
let right_op = right_op.map(|(_, op)| op);
// let order = if order == Ordering::Equal {
// match (left_op.as_ref(), right_op.as_ref()) {
// (Some(Operation::Insert { .. }), Some(Operation::Delete { .. })) => {
// Ordering::Greater
// }
// (Some(Operation::Delete { .. }), Some(Operation::Insert { .. })) => {
// Ordering::Less
// }
// _ => Ordering::Equal,
// }
// } else {
// order
// };
// debug_assert!(
// right_merge_context.last_delete.is_none()
// || left_merge_context.last_delete.is_none(),
// "Both contexts have a last delete"
// );
match (left_op, right_op, order) {
(Some(left_op), None, _)
| (Some(left_op), Some(_), std::cmp::Ordering::Less | std::cmp::Ordering::Equal) => {
Self::pick_up_dangling_delete_from_affecting_context(
left_op.start_index(),
&mut right_merge_context,
);
if let Some(op) = Self::merge_operations_with_context(
left_op.with_shifted_index(right_merge_context.shift)?,
&mut right_merge_context,
&mut left_merge_context,
)? {
// println!("merged {:#?}", &op);
if let Some(last) = merged_operations.last() {
debug_assert!(op.start_index() >= last.start_index());
}
merged_operations.push(op);
}
left_index += 1;
}
(None, Some(right_op), _)
| (Some(_), Some(right_op), std::cmp::Ordering::Greater) => {
Self::pick_up_dangling_delete_from_affecting_context(
right_op.start_index(),
&mut left_merge_context,
);
if let Some(op) = Self::merge_operations_with_context(
right_op.with_shifted_index(left_merge_context.shift)?,
&mut left_merge_context,
&mut right_merge_context,
)? {
// println!("merged {:#?}", &op);
if let Some(last) = merged_operations.last() {
debug_assert!(op.start_index() >= last.start_index());
}
merged_operations.push(op);
}
right_index += 1;
}
(None, None, _) => {
break;
}
};
println!(
"{:#?} <> {:#?}\n\n\n",
left_merge_context, right_merge_context
);
}
println!("merged_operations: {:#?}", merged_operations.to_vec());
Ok(Self::new(
merged_operations.into_iter().map(|op| (0, op)).collect(),
))
}
pub fn apply<'a>(&self, rope_text: &'a mut Rope) -> Result<&'a mut Rope, SyncLibError> {
for (_, operation) in &self.operations {
operation.apply(rope_text)?;
}
Ok(rope_text)
}
fn merge_operations_with_context(
aligned_operation: Operation,
affecting_context: &mut MergeContext,
produced_context: &mut MergeContext,
) -> Result<Option<Operation>, SyncLibError> {
Ok(
match (aligned_operation, affecting_context.last_delete.clone()) {
(operation @ Operation::Insert { .. }, None) => {
produced_context.shift += operation.len() as i64;
Some(operation)
}
(operation @ Operation::Delete { .. }, None) => {
Self::replace_delete_in_produced_context(
produced_context,
Some(operation.clone()),
);
Some(operation)
}
(operation @ Operation::Insert { .. }, Some(last_delete)) => {
produced_context.shift += operation.len() as i64;
debug_assert!(
last_delete.range().contains(&operation.start_index()),
"There is a last delete ({last_delete}) but the operation ({operation}) is not contained in it"
);
let difference =
operation.start_index() as i64 - last_delete.start_index() as i64;
let moved_operation = operation.with_index(last_delete.start_index());
affecting_context.last_delete = Operation::create_delete(
moved_operation.end_index() + 1,
(last_delete.len() as i64 - difference) as usize,
);
affecting_context.shift -= difference;
Some(moved_operation)
}
(operation @ Operation::Delete { .. }, Some(last_delete)) => {
debug_assert!(
last_delete.range().contains(&operation.start_index()),
"There is a last delete ({last_delete}) but the operation ({operation}) is not contained in it"
);
let difference =
operation.start_index() as i64 - last_delete.start_index() as i64;
let updated_delete = Operation::create_delete(
last_delete.start_index(),
0.max(operation.end_index() as i64 - last_delete.end_index() as i64)
as usize,
);
affecting_context.shift -= difference;
affecting_context.last_delete = Operation::create_delete(
last_delete.start_index(),
0.max(last_delete.end_index() as i64 - operation.end_index() as i64)
as usize,
);
Self::replace_delete_in_produced_context(
produced_context,
updated_delete.clone(),
);
updated_delete
}
},
)
}
fn replace_delete_in_produced_context(
produced_context: &mut MergeContext,
delete: Option<Operation>,
) {
if let Some(produced_last_delete) = produced_context.last_delete.take() {
produced_context.shift -= produced_last_delete.len() as i64;
}
produced_context.last_delete = delete;
}
fn pick_up_dangling_delete_from_affecting_context(
start_index: usize,
affecting_context: &mut MergeContext,
) {
match affecting_context.last_delete.as_ref() {
Some(last_delete)
if start_index as i64 + affecting_context.shift
> last_delete.end_index() as i64 =>
{
affecting_context.shift -= last_delete.len() as i64;
affecting_context.last_delete = None;
}
_ => {}
}
}
}
#[cfg(test)]
mod tests {
use std::{fs, path::Path};
use itertools::Itertools;
use pretty_assertions::assert_eq;
use super::*;
#[test]
fn test_calculate_operations() {
let left = "hello world! How are you? Adam";
let right = "Hello, my friend! How are you doing? Albert";
let operations = OperationSequence::from_strings(left, right);
insta::assert_debug_snapshot!(operations);
let mut left = Rope::from_str(left);
let new_right = operations.apply(&mut left).unwrap();
assert_eq!(new_right.to_string(), right);
}
#[test]
fn test_calculate_operations_with_no_diff() {
let left = "hello world!";
let right = "hello world!";
let operations = OperationSequence::from_strings(left, right);
assert_eq!(operations.operations.len(), 0);
let mut left = Rope::from_str(left);
let new_right = operations.apply(&mut left).unwrap();
assert_eq!(new_right.to_string(), right);
}
#[test]
fn test_merges() {
// Both replaced one token but different
test_merge_both_ways(
"original_1 original_2 original_3",
"original_1 edit_1 original_3",
"original_1 original_2 edit_2",
"original_1 edit_1 edit_2",
);
// Both replaced the same one token
test_merge_both_ways(
"original_1 original_2 original_3",
"original_1 edit_1 original_3",
"original_1 edit_1 original_3",
"original_1 edit_1 edit_1 original_3",
);
// One deleted a large range, the other deleted subranges and inserted as well
test_merge_both_ways(
"original_1 original_2 original_3 original_4 original_5",
"original_1 original_5",
"original_1 edit_1 original_3 edit_2 original_5",
"original_1 edit_1 edit_2 original_5",
);
// One deleted a large range, the other inserted and deleted a partially overlapping range
test_merge_both_ways(
"original_1 original_2 original_3 original_4 original_5",
"original_1 original_5",
"original_1 edit_1 original_3 edit_2",
"original_1 edit_1 edit_2",
);
// Merge a replace and an append
test_merge_both_ways("a b ", "c d ", "a b c d ", "c d c d ");
test_merge_both_ways("a b c d e", "a e", "a c e", "a e");
test_merge_both_ways("a 0 1 2 b", "a b", "a E 1 F b", "a E F b");
test_merge_both_ways(
"a this one delete b",
"a b",
"a my one change b",
"a my change b",
);
test_merge_both_ways(
"this stays, this is one big delete, don't touch this",
"this stays, don't touch this",
"this stays, my one change, don't touch this",
"this stays, my change, don't touch this",
);
test_merge_both_ways("1 2 3 4 5 6", "1 6", "1 2 4 ", "1 ");
test_merge_both_ways(
"hello world",
"hi, world",
"hello my friend!",
"hi, my friend!",
);
// test_merge_both_ways("hello world", "world !", "hi hello world", "hi world !");
test_merge_both_ways(
"both delete the same word",
"both the same word",
"both the same word",
"both the same word",
);
test_merge_both_ways(" ", "its utf-8!", " ", "its utf-8!");
test_merge_both_ways(
"both delete the same word but one a bit more",
"both the same word",
"both same word",
"both same wordword",
);
test_merge_both_ways(
"long text with one big delete and many small",
"long small",
"long with big and small",
"long small",
);
}
#[test]
fn test_merge_files_without_panic() {
let files = vec![
"pride_and_prejudice.txt",
"romeo_and_juliet.txt",
"room_with_a_view.txt",
];
let root = Path::new("test/resources/");
let contents = files
.into_iter()
.map(|name| fs::read_to_string(root.join(name)).unwrap())
.map(|text| text[..15000].to_string())
.collect::<Vec<_>>();
contents
.iter()
.permutations(3)
.unique()
.for_each(|permutations| {
test_merge(permutations[0], permutations[1], permutations[2]);
});
}
fn test_merge_both_ways(original: &str, edit_1: &str, edit_2: &str, expected: &str) {
assert_eq!(test_merge(original, edit_1, edit_2), expected);
assert_eq!(test_merge(original, edit_2, edit_1), expected);
}
fn test_merge(original: &str, edit_1: &str, edit_2: &str) -> String {
// println!(
// "original: '{:#}'",
// original[..100.min(original.len())].to_string()
// );
// println!(
// "edit_1: '{:#}'",
// edit_1[..100.min(edit_1.len())].to_string()
// );
// println!(
// "edit_2: '{:#}'",
// edit_2[..100.min(edit_2.len())].to_string()
// );
let mut original = Rope::from_str(original);
let operations_1 = OperationSequence::from_strings(&original.to_string(), edit_1);
// println!(
// "operations_1: {:#?}",
// operations_1.operations[..20.min(operations_1.operations.len())].to_vec()
// );
let operations_2 = OperationSequence::from_strings(&original.to_string(), edit_2);
// println!(
// "operations_2: {:#?}",
// operations_2.operations[..20.min(operations_2.operations.len())].to_vec()
// );
assert_eq!(
operations_1
.apply(&mut original.clone())
.unwrap()
.to_string(),
edit_1
);
assert_eq!(
operations_2
.apply(&mut original.clone())
.unwrap()
.to_string(),
edit_2
);
let merged = operations_1.merge(&operations_2).unwrap();
let result = merged.apply(&mut original).unwrap();
result.to_string()
}
}

View file

@ -1,10 +0,0 @@
---
source: reconcile/src/operations/operation.rs
expression: "Operation::create_insert(1, \"hi\".to_string()).unwrap().with_shifted_index(-2)"
snapshot_kind: text
---
Err(
NegativeOperationIndexError(
"Index -1 is negative but operations must have a non-negative index",
),
)

View file

@ -1,7 +1,7 @@
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::operations::Operation;
use crate::operation_transformation::Operation;
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone, PartialEq, Eq, Hash)]