Merge crates

This commit is contained in:
Andras Schmelczer 2025-06-15 11:30:07 +01:00
parent 82e77eec89
commit bcbac03228
No known key found for this signature in database
GPG key ID: FC8F2C3D3D1A718C
60 changed files with 73 additions and 248 deletions

View file

@ -0,0 +1,57 @@
use std::borrow::Cow;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
// CursorPosition represents the position of an identifiable cursor in a text
// document based on its (UTF-8) character index.
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone, PartialEq, Default)]
pub struct CursorPosition {
pub id: usize,
pub char_index: usize,
}
impl CursorPosition {
#[must_use]
pub fn with_index(&self, index: usize) -> Self {
CursorPosition {
id: self.id,
char_index: index,
}
}
}
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone, PartialEq, Default)]
pub struct TextWithCursors<'a> {
pub text: Cow<'a, str>,
pub cursors: Vec<CursorPosition>,
}
impl<'a> TextWithCursors<'a> {
#[must_use]
pub fn new(text: &'a str, cursors: Vec<CursorPosition>) -> Self {
Self {
text: text.into(),
cursors,
}
}
#[must_use]
pub fn new_owned(text: String, cursors: Vec<CursorPosition>) -> Self {
Self {
text: text.into(),
cursors,
}
}
}
impl<'a> From<&'a str> for TextWithCursors<'a> {
fn from(text: &'a str) -> Self {
Self {
text: text.into(),
cursors: Vec::new(),
}
}
}

View file

@ -0,0 +1,277 @@
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use super::{CursorPosition, Operation, TextWithCursors, ordered_operation::OrderedOperation};
use crate::{
diffs::{myers::diff, raw_operation::RawOperation},
operation_transformation::{
merge_context::MergeContext,
utils::{cook_operations::cook_operations, elongate_operations::elongate_operations},
},
tokenizer::{Tokenizer, word_tokenizer::word_tokenizer},
utils::{side::Side, string_builder::StringBuilder},
};
/// A text document and a sequence of operations that can be applied to the text
/// document. `EditedText` supports merging two sequences of operations using
/// the principles of Operational Transformation.
///
/// It's mainly created through the `from_strings` method, then merged with
/// another `EditedText` derived from the same original text and then applied to
/// the original text to get the reconciled text of concurrent edits.
///
/// In addition to text and operations, it also keeps track of cursor positions
/// in the original text. The cursor positions are updated when the operations
/// are applied, so that the cursor positions can be used to restore the
/// cursor positions in the updated text.
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone, PartialEq, Default)]
pub struct EditedText<'a, T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
text: &'a str,
operations: Vec<OrderedOperation<T>>,
pub(crate) cursors: Vec<CursorPosition>,
}
impl<'a> EditedText<'a, String> {
/// Create an `EditedText` from the given original (old) and updated (new)
/// strings. The returned `EditedText` represents the changes from the
/// original to the updated text. When the return value is applied to
/// the original text, it will result in the updated text. The default
/// word tokenizer is used to tokenize the text which splits the text on
/// whitespaces.
#[must_use]
pub fn from_strings(original: &'a str, updated: TextWithCursors<'a>) -> Self {
Self::from_strings_with_tokenizer(original, updated, &word_tokenizer)
}
}
impl<'a, T> EditedText<'a, T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
/// Create an `EditedText` from the given original (old) and updated (new)
/// strings. The returned `EditedText` represents the changes from the
/// original to the updated text. When the return value is applied to
/// the original text, it will result in the updated text. The tokenizer
/// function is used to tokenize the text.
pub fn from_strings_with_tokenizer(
original: &'a str,
updated: TextWithCursors<'a>,
tokenizer: &Tokenizer<T>,
) -> Self {
let original_tokens = (tokenizer)(original);
let updated_tokens = (tokenizer)(&updated.text);
let diff: Vec<RawOperation<T>> = diff(&original_tokens, &updated_tokens);
Self::new(
original,
cook_operations(elongate_operations(diff)).collect(),
updated.cursors,
)
}
/// Create a new `EditedText` with the given operations.
/// The operations must be in the order in which they are meant to be
/// applied. The operations must not overlap.
fn new(
text: &'a str,
operations: Vec<OrderedOperation<T>>,
mut cursors: Vec<CursorPosition>,
) -> Self {
operations
.iter()
.zip(operations.iter().skip(1))
.for_each(|(previous, next)| {
debug_assert!(
previous.operation.start_index() <= next.operation.start_index(),
"{} must not come before {} yet it does",
previous.operation,
next.operation
);
});
cursors.sort_by_key(|cursor| cursor.char_index);
Self {
text,
operations,
cursors,
}
}
#[must_use]
pub fn merge(self, other: Self) -> Self {
debug_assert_eq!(
self.text, other.text,
"`EditedText`-s must be derived from the same text to be mergable"
);
let mut left_merge_context = MergeContext::default();
let mut right_merge_context = MergeContext::default();
let mut merged_cursors = Vec::with_capacity(self.cursors.len() + other.cursors.len());
let mut left_cursors = self.cursors.into_iter().peekable();
let mut right_cursors = other.cursors.into_iter().peekable();
let mut merged_operations: Vec<OrderedOperation<T>> =
Vec::with_capacity(self.operations.len() + other.operations.len());
let mut left_iter = self.operations.into_iter();
let mut right_iter = other.operations.into_iter();
let mut maybe_left_op = left_iter.next();
let mut maybe_right_op = right_iter.next();
loop {
let (side, OrderedOperation { operation, order }) =
match (maybe_left_op.clone(), maybe_right_op.clone()) {
(Some(left_op), Some(right_op)) => {
if left_op < right_op {
(Side::Left, left_op)
} else {
(Side::Right, right_op)
}
}
(Some(left_op), None) => (Side::Left, left_op),
(None, Some(right_op)) => (Side::Right, right_op),
(None, None) => break,
};
if side == Side::Left {
maybe_left_op = left_iter.next();
} else {
maybe_right_op = right_iter.next();
}
let original_start = operation.start_index() as i64;
let original_end = operation.end_index();
let original_length = operation.len() as i64;
let result = match side {
Side::Left => operation.merge_operations_with_context(
&mut right_merge_context,
&mut left_merge_context,
),
Side::Right => operation.merge_operations_with_context(
&mut left_merge_context,
&mut right_merge_context,
),
};
if let Some(ref op @ (Operation::Insert { .. } | Operation::Equal { .. })) = result {
let shift =
op.start_index() as i64 - original_start + op.len() as i64 - original_length;
match side {
Side::Left => {
while let Some(cursor) =
left_cursors.next_if(|cursor| cursor.char_index <= original_end + 1)
{
merged_cursors.push(cursor.with_index(
(op.start_index() as i64).max(cursor.char_index as i64 + shift)
as usize,
));
}
}
Side::Right => {
while let Some(cursor) =
right_cursors.next_if(|cursor| cursor.char_index <= original_end + 1)
{
merged_cursors.push(cursor.with_index(
(op.start_index() as i64).max(cursor.char_index as i64 + shift)
as usize,
));
}
}
}
}
merged_operations.extend(result.into_iter().map(|op| OrderedOperation {
order,
operation: op,
}));
}
let last_index = merged_operations
.iter()
.filter(|operation| {
matches!(
operation.operation,
Operation::Insert { .. } | Operation::Equal { .. }
)
})
.next_back()
.map_or(0, |op| op.operation.end_index());
for cursor in left_cursors.chain(right_cursors) {
merged_cursors.push(cursor.with_index(last_index));
}
Self::new(self.text, merged_operations, merged_cursors)
}
/// Apply the operations to the text and return the resulting text.
#[must_use]
pub fn apply(&self) -> String {
let mut builder: StringBuilder<'_> = StringBuilder::new(self.text);
for OrderedOperation { operation, .. } in &self.operations {
builder = operation.apply(builder);
}
builder.build()
}
}
#[cfg(test)]
mod tests {
use std::env;
use insta::assert_debug_snapshot;
use pretty_assertions::assert_eq;
use super::*;
#[test]
fn test_calculate_operations() {
let left = "hello world! How are you? Adam";
let right = "Hello, my friend! How are you doing? Albert";
let operations = EditedText::from_strings(left, right.into());
insta::assert_debug_snapshot!(operations);
let new_right = operations.apply();
assert_eq!(new_right.to_string(), right);
}
#[test]
fn test_calculate_operations_with_no_diff() {
let text = "hello world!";
let operations = EditedText::from_strings(text, text.into());
assert_debug_snapshot!(operations);
let new_right = operations.apply();
assert_eq!(new_right.to_string(), text);
}
#[test]
fn test_calculate_operations_with_insert() {
let original = "hello world! ...";
let left = "Hello world! I'm Andras.";
let right = "Hello world! How are you?";
let expected = "Hello world! How are you? I'm Andras.";
let operations_1 = EditedText::from_strings(original, left.into());
let operations_2 = EditedText::from_strings(original, right.into());
let operations = operations_1.merge(operations_2);
assert_eq!(operations.apply(), expected);
}
}

View file

@ -0,0 +1,73 @@
use core::fmt::Debug;
use crate::operation_transformation::Operation;
#[derive(Clone, Debug)]
pub struct MergeContext<T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
last_operation: Option<Operation<T>>,
pub shift: i64,
}
impl<T> Default for MergeContext<T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
fn default() -> Self {
MergeContext {
last_operation: None,
shift: 0,
}
}
}
impl<T> MergeContext<T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
pub fn last_operation(&self) -> Option<&Operation<T>> { self.last_operation.as_ref() }
pub fn replace_last_operation(&mut self, operation: Option<Operation<T>>) {
self.last_operation = operation;
}
/// Replace the last delete operation (if there was one) with a new one
/// while applying it to the `shift` in case the last operation
/// was a delete.
pub fn consume_and_replace_last_operation(&mut self, operation: Option<Operation<T>>) {
if let Some(Operation::Delete {
deleted_character_count,
..
}) = self.last_operation.take()
{
self.shift -= deleted_character_count as i64;
}
self.last_operation = operation;
}
/// Remove the last operation (if there was one) in case it is behind the
/// threshold operation. This updates the `shift` in case the last operation
/// was a delete.
pub fn consume_last_operation_if_it_is_too_behind(&mut self, threshold_index: i64) {
if let Some(last_operation) = self.last_operation.as_ref() {
if let Operation::Delete {
deleted_character_count,
..
} = last_operation
{
if threshold_index + self.shift > last_operation.end_index() as i64 {
self.shift -= *deleted_character_count as i64;
self.last_operation = None;
}
} else if let Operation::Insert { .. } = last_operation
&& threshold_index + self.shift - last_operation.len() as i64
> last_operation.end_index() as i64
{
self.last_operation = None;
}
}
}
}

View file

@ -0,0 +1,513 @@
use core::fmt::{Debug, Display};
use std::ops::Range;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use super::merge_context::MergeContext;
use crate::{
Token,
utils::{
find_longest_prefix_contained_within::find_longest_prefix_contained_within,
string_builder::StringBuilder,
},
};
/// Represents a change that can be applied on a `StringBuilder`.
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Clone, PartialEq)]
pub enum Operation<T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
Equal {
index: usize,
length: usize,
#[cfg(debug_assertions)]
text: Option<String>,
},
Insert {
index: usize,
text: Vec<Token<T>>,
},
Delete {
index: usize,
deleted_character_count: usize,
#[cfg(debug_assertions)]
deleted_text: Option<String>,
},
}
impl<T> Operation<T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
/// Creates an equal operation with the given index.
/// This operation is used to indicate that the text at the given index
/// is unchanged.
pub fn create_equal(index: usize, length: usize) -> Option<Self> {
if length == 0 {
return None;
}
Some(Operation::Equal {
index,
length,
#[cfg(debug_assertions)]
text: None,
})
}
pub fn create_equal_with_text(index: usize, text: String) -> Option<Self> {
if text.is_empty() {
return None;
}
Some(Operation::Equal {
index,
length: text.chars().count(),
#[cfg(debug_assertions)]
text: Some(text),
})
}
/// Creates an insert operation with the given index and text.
/// If the text is empty (meaning that the operation would be a no-op),
/// returns None.
pub fn create_insert(index: usize, text: Vec<Token<T>>) -> Option<Self> {
if text.is_empty() {
return None;
}
Some(Operation::Insert { index, text })
}
/// Creates a delete operation with the given index and number of
/// to-be-deleted characters. If the operation would delete 0 (meaning
/// that the operation would be a no-op), returns None.
pub fn create_delete(index: usize, deleted_character_count: usize) -> Option<Self> {
if deleted_character_count == 0 {
return None;
}
Some(Operation::Delete {
index,
deleted_character_count,
#[cfg(debug_assertions)]
deleted_text: None,
})
}
pub fn create_delete_with_text(index: usize, text: String) -> Option<Self> {
if text.is_empty() {
return None;
}
Some(Operation::Delete {
index,
deleted_character_count: text.chars().count(),
#[cfg(debug_assertions)]
deleted_text: Some(text),
})
}
/// Applies the operation to the given `StringBuilder`, returning the
/// modified `StringBuilder`.
///
/// When compiled in debug mode, panics if a delete operation is attempted
/// on a range of text that does not match the text to be deleted.
pub fn apply<'a>(&self, mut builder: StringBuilder<'a>) -> StringBuilder<'a> {
match self {
Operation::Equal {
#[cfg(debug_assertions)]
text,
..
} => {
#[cfg(debug_assertions)]
debug_assert!(
text.as_ref()
.is_none_or(|text| builder.get_slice(self.range()) == *text),
"Text which is supposed to be equal does not match the text in the range"
);
return builder;
}
Operation::Insert { text, .. } => builder.insert(
self.start_index(),
&text.iter().map(Token::original).collect::<String>(),
),
Operation::Delete {
#[cfg(debug_assertions)]
deleted_text,
..
} => {
#[cfg(debug_assertions)]
debug_assert!(
deleted_text
.as_ref()
.is_none_or(|text| builder.get_slice(self.range()) == *text),
"Text to delete does not match the text in the range"
);
builder.delete(self.range());
}
}
builder
}
/// Returns the index of the first character that the operation affects.
pub fn start_index(&self) -> usize {
match self {
Operation::Equal { index, .. }
| Operation::Insert { index, .. }
| Operation::Delete { index, .. } => *index,
}
}
/// Returns the index of the last character that the operation affects.
pub fn end_index(&self) -> usize {
debug_assert!(
self.len() > 0,
" len() must be greater than 0 because operations must be non-empty"
);
self.start_index() + self.len() - 1
}
/// Returns the range of indices of characters that the operation affects.
#[allow(clippy::range_plus_one)]
pub fn range(&self) -> Range<usize> { self.start_index()..self.end_index() + 1 }
/// Returns the number of affected characters. It is always greater than 0
/// because empty operations cannot be created.
pub fn len(&self) -> usize {
match self {
Operation::Equal { length, .. } => *length,
Operation::Insert { text, .. } => text.iter().map(Token::get_original_length).sum(),
Operation::Delete {
deleted_character_count,
..
} => *deleted_character_count,
}
}
/// Creates a new operation with the same type and text but with the given
/// index.
pub fn with_index(self, index: usize) -> Self {
match self {
Operation::Equal {
length,
#[cfg(debug_assertions)]
text,
..
} => Operation::Equal {
index,
length,
#[cfg(debug_assertions)]
text,
},
Operation::Insert { text, .. } => Operation::Insert { index, text },
Operation::Delete {
deleted_character_count,
#[cfg(debug_assertions)]
deleted_text,
..
} => Operation::Delete {
index,
deleted_character_count,
#[cfg(debug_assertions)]
deleted_text,
},
}
}
/// Creates a new operation with the same type and text but with the index
/// shifted by the given offset. The offset can be negative but the
/// resulting index must be non-negative.
///
/// # Panics
///
/// In debug mode, panics if the resulting index is negative.
pub fn with_shifted_index(self, offset: i64) -> Self {
let index = self.start_index() as i64 + offset;
debug_assert!(index >= 0, "Shifted index must be non-negative");
self.with_index(index as usize)
}
/// Merges the operation with the given context, producing a new operation
/// and updating the context. This implements a comples FSM that handles
/// the merging of operations in a way that is consistent with the text.
/// The contexts are updated in-place.
#[allow(clippy::too_many_lines)]
pub fn merge_operations_with_context(
self,
affecting_context: &mut MergeContext<T>,
produced_context: &mut MergeContext<T>,
) -> Option<Operation<T>> {
affecting_context.consume_last_operation_if_it_is_too_behind(self.start_index() as i64);
let operation = self.with_shifted_index(affecting_context.shift);
match (operation, affecting_context.last_operation()) {
(operation @ Operation::Insert { .. }, None | Some(Operation::Equal { .. })) => {
produced_context.shift += operation.len() as i64;
produced_context.consume_and_replace_last_operation(Some(operation.clone()));
Some(operation)
}
(
Operation::Insert { text, index },
Some(Operation::Insert {
text: previous_inserted_text,
..
}),
) => {
// In case the current insert's prefix appears in the previously inserted text,
// we can trim the current insert to only include the non-overlapping part.
// This way, we don't end up duplicating text.
let offset_in_tokens =
find_longest_prefix_contained_within(previous_inserted_text, &text);
let offset_in_length = text
.iter()
.take(offset_in_tokens)
.map(Token::get_original_length)
.sum::<usize>();
let trimmed_operation =
Operation::create_insert(index, text[offset_in_tokens..].to_vec());
affecting_context.shift -= offset_in_length as i64;
produced_context.shift += trimmed_operation
.as_ref()
.map(Operation::len)
.unwrap_or_default() as i64;
produced_context.consume_and_replace_last_operation(trimmed_operation.clone());
trimmed_operation
}
(
operation @ Operation::Delete { .. },
None | Some(Operation::Insert { .. } | Operation::Equal { .. }),
) => {
produced_context.consume_and_replace_last_operation(Some(operation.clone()));
Some(operation)
}
(
operation @ Operation::Insert { .. },
Some(last_delete @ Operation::Delete { .. }),
) => {
produced_context.shift += operation.len() as i64;
debug_assert!(
last_delete.range().contains(&operation.start_index()),
"There is a last delete ({last_delete}) but the operation ({operation}) is \
not contained in it"
);
let difference = operation.start_index() as i64 - last_delete.start_index() as i64;
let moved_operation = operation.with_index(last_delete.start_index());
affecting_context.replace_last_operation(Operation::create_delete(
moved_operation.end_index() + 1,
(last_delete.len() as i64 - difference) as usize,
));
affecting_context.shift -= difference;
produced_context.consume_and_replace_last_operation(Some(moved_operation.clone()));
Some(moved_operation)
}
(
operation @ Operation::Delete { .. },
Some(last_delete @ Operation::Delete { .. }),
) => {
debug_assert!(
last_delete.range().contains(&operation.start_index()),
"There is a last delete ({last_delete}) but the operation ({operation}) is \
not contained in it"
);
let difference = operation.start_index() as i64 - last_delete.start_index() as i64;
let updated_delete = Operation::create_delete(
last_delete.start_index(),
0.max(operation.end_index() as i64 - last_delete.end_index() as i64) as usize,
);
affecting_context.replace_last_operation(Operation::create_delete(
last_delete.start_index(),
0.max(last_delete.end_index() as i64 - operation.end_index() as i64) as usize,
));
affecting_context.shift -= difference;
produced_context.consume_and_replace_last_operation(updated_delete.clone());
updated_delete
}
(
ref operation @ Operation::Equal {
length,
#[cfg(debug_assertions)]
ref text,
..
},
Some(last_delete @ Operation::Delete { .. }),
) => {
debug_assert!(
last_delete.range().contains(&operation.start_index()),
"There is a last delete ({last_delete}) but the operation ({operation}) is \
not contained in it"
);
let overlap = (length as i64)
.min(last_delete.end_index() as i64 - operation.start_index() as i64 + 1);
#[cfg(debug_assertions)]
let result = text.as_ref().map_or_else(
|| {
Operation::create_equal(
operation.end_index().min(last_delete.end_index()),
(length as i64 - overlap) as usize,
)
},
|text| {
Operation::create_equal_with_text(
operation.end_index().min(last_delete.end_index()),
text.chars().skip(overlap as usize).collect::<String>(),
)
},
);
#[cfg(not(debug_assertions))]
let result = Operation::create_equal(
operation.end_index().min(last_delete.end_index()),
(length as i64 - overlap) as usize,
);
result
}
(operation @ Operation::Equal { .. }, _) => Some(operation),
}
}
}
impl<T> Display for Operation<T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match self {
Operation::Equal {
index,
length,
#[cfg(debug_assertions)]
text,
} => {
#[cfg(debug_assertions)]
write!(
f,
"<equal {} from index {}>",
text.as_ref()
.map(|text| format!("'{text}'"))
.unwrap_or(format!("{length} characters")),
index
)?;
#[cfg(not(debug_assertions))]
write!(f, "<equal {length} from index {index}>")?;
Ok(())
}
Operation::Insert { index, text } => {
write!(
f,
"<insert '{}' from index {}>",
text.iter().map(Token::original).collect::<String>(),
index
)
}
Operation::Delete {
index,
deleted_character_count,
#[cfg(debug_assertions)]
deleted_text,
} => {
#[cfg(debug_assertions)]
write!(
f,
"<delete {} from index {}>",
deleted_text
.as_ref()
.map(|text| format!("'{text}'"))
.unwrap_or(format!("{deleted_character_count} characters")),
index
)?;
#[cfg(not(debug_assertions))]
write!(
f,
"<delete {deleted_character_count} characters from index {index}>",
)?;
Ok(())
}
}
}
}
impl<T> Debug for Operation<T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { write!(f, "{self}") }
}
#[cfg(test)]
mod tests {
use pretty_assertions::assert_eq;
use super::*;
#[test]
#[should_panic(expected = "Shifted index must be non-negative")]
fn test_shifting_error() {
insta::assert_debug_snapshot!(
Operation::create_insert(1, vec!["hi".into()])
.unwrap()
.with_shifted_index(-2)
);
}
#[test]
fn test_apply_delete_with_create() {
let builder = StringBuilder::new("hello world");
let operation = Operation::<()>::create_delete_with_text(5, " world".to_owned()).unwrap();
assert_eq!(operation.apply(builder).build(), "hello");
}
#[test]
fn test_apply_insert() {
let builder = StringBuilder::new("hello");
let operation = Operation::create_insert(5, vec![" my friend".into()]).unwrap();
assert_eq!(operation.apply(builder).build(), "hello my friend");
}
}

View file

@ -0,0 +1,48 @@
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::operation_transformation::Operation;
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone, PartialEq)]
pub struct OrderedOperation<T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
pub order: usize,
pub operation: Operation<T>,
}
impl<T> OrderedOperation<T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
pub fn get_sort_key(&self) -> (usize, usize, String) {
(
self.order,
self.operation.start_index(),
// Make sure that the ordering is deterministic regardless of which text
// is left or right.
match &self.operation {
Operation::Equal { index, .. } => index.to_string(),
Operation::Insert { text, .. } => text
.iter()
.map(crate::tokenizer::token::Token::original)
.collect::<String>(),
Operation::Delete {
deleted_character_count,
..
} => deleted_character_count.to_string(),
},
)
}
}
impl<T> PartialOrd for OrderedOperation<T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
self.get_sort_key().partial_cmp(&other.get_sort_key())
}
}

View file

@ -0,0 +1,43 @@
---
source: reconcile/src/operation_transformation/edited_text.rs
expression: operations
snapshot_kind: text
---
EditedText {
text: "hello world! How are you? Adam",
operations: [
OrderedOperation {
order: 0,
operation: <insert 'Hello, my friend!' from index 0>,
},
OrderedOperation {
order: 0,
operation: <delete 'hello world!' from index 17>,
},
OrderedOperation {
order: 12,
operation: <equal ' ' from index 17>,
},
OrderedOperation {
order: 13,
operation: <equal 'How' from index 18>,
},
OrderedOperation {
order: 16,
operation: <equal ' ' from index 21>,
},
OrderedOperation {
order: 17,
operation: <equal 'are' from index 22>,
},
OrderedOperation {
order: 20,
operation: <insert ' you doing? Albert' from index 25>,
},
OrderedOperation {
order: 20,
operation: <delete ' you? Adam' from index 43>,
},
],
cursors: [],
}

View file

@ -0,0 +1,23 @@
---
source: reconcile/src/operation_transformation/edited_text.rs
expression: operations
snapshot_kind: text
---
EditedText {
text: "hello world!",
operations: [
OrderedOperation {
order: 0,
operation: <equal 'hello' from index 0>,
},
OrderedOperation {
order: 5,
operation: <equal ' ' from index 5>,
},
OrderedOperation {
order: 6,
operation: <equal 'world!' from index 6>,
},
],
cursors: [],
}

View file

@ -0,0 +1,61 @@
---
source: reconcile/src/operations/edited_text.rs
expression: operations
snapshot_kind: text
---
EditedText {
text: "hello world! How are you? Adam",
operations: [
OrderedOperation {
order: 0,
operation: Insert {
index: 0,
text: "Hello, my friend! ",
},
},
OrderedOperation {
order: 0,
operation: Delete {
index: 18,
deleted_character_count: 13,
deleted_text: Some(
"hello world! ",
),
},
},
OrderedOperation {
order: 21,
operation: Delete {
index: 26,
deleted_character_count: 5,
deleted_text: Some(
"you? ",
),
},
},
OrderedOperation {
order: 26,
operation: Delete {
index: 26,
deleted_character_count: 5,
deleted_text: Some(
" Adam",
),
},
},
OrderedOperation {
order: 31,
operation: Insert {
index: 26,
text: "you ",
},
},
OrderedOperation {
order: 31,
operation: Insert {
index: 30,
text: "doing? Albert",
},
},
],
}

View file

@ -0,0 +1,60 @@
---
source: reconcile/src/operations/operation_sequence.rs
expression: operations
snapshot_kind: text
---
EditedText {
operations: [
OrderedOperation {
order: 0,
operation: Insert {
index: 0,
text: "Hello, my friend! ",
},
},
OrderedOperation {
order: 0,
operation: Delete {
index: 18,
deleted_character_count: 13,
deleted_text: Some(
"hello world! ",
),
},
},
OrderedOperation {
order: 21,
operation: Delete {
index: 26,
deleted_character_count: 5,
deleted_text: Some(
"you? ",
),
},
},
OrderedOperation {
order: 26,
operation: Delete {
index: 26,
deleted_character_count: 5,
deleted_text: Some(
" Adam",
),
},
},
OrderedOperation {
order: 31,
operation: Insert {
index: 26,
text: "you ",
},
},
OrderedOperation {
order: 31,
operation: Insert {
index: 30,
text: "doing? Albert",
},
},
],
}

View file

@ -0,0 +1,2 @@
pub mod cook_operations;
pub mod elongate_operations;

View file

@ -0,0 +1,55 @@
use crate::{
diffs::raw_operation::RawOperation,
operation_transformation::{Operation, ordered_operation::OrderedOperation},
};
/// Turn raw operations into ordered operations while keeping track of old & new
/// indexes.
pub fn cook_operations<I, T>(raw_operations: I) -> impl Iterator<Item = OrderedOperation<T>>
where
I: IntoIterator<Item = RawOperation<T>>,
T: PartialEq + Clone + std::fmt::Debug,
{
let mut new_index = 0; // this is the start index of the operation on the new text
let mut order = 0; // this is the start index of the operation on the original text
raw_operations.into_iter().filter_map(move |raw_operation| {
let length = raw_operation.original_text_length();
match raw_operation {
RawOperation::Equal(..) => {
let op = if cfg!(debug_assertions) {
Operation::create_equal_with_text(new_index, raw_operation.get_original_text())
} else {
Operation::create_equal(new_index, length)
}
.map(|operation| OrderedOperation { order, operation });
new_index += length;
order += length;
op
}
RawOperation::Insert(tokens) => {
let op = Operation::create_insert(new_index, tokens)
.map(|operation| OrderedOperation { order, operation });
new_index += length;
op
}
RawOperation::Delete(..) => {
let op = if cfg!(debug_assertions) {
Operation::create_delete_with_text(new_index, raw_operation.get_original_text())
} else {
Operation::create_delete(new_index, length)
}
.map(|operation| OrderedOperation { order, operation });
order += length;
op
}
}
})
}

View file

@ -0,0 +1,127 @@
use core::iter;
use crate::diffs::raw_operation::RawOperation;
/// Elongates the operations by merging adjacent insertions and deletions that
/// can be joined. This makes the subsequent merging of operations more
/// intuitive.
pub fn elongate_operations<I, T>(raw_operations: I) -> Vec<RawOperation<T>>
where
I: IntoIterator<Item = RawOperation<T>>,
T: PartialEq + Clone + std::fmt::Debug,
{
// This might look bad, but this makes sense. The inserts and deltes can be
// interleaved, such as: IDIDID and we need to turn this into IIIDDD.
// So we need to keep track of both the last insert and delete operations, not
// just the last one.
let mut maybe_previous_insert: Option<RawOperation<T>> = None;
let mut maybe_previous_delete: Option<RawOperation<T>> = None;
let mut result: Vec<RawOperation<T>> = raw_operations
.into_iter()
.flat_map(|next| match next {
RawOperation::Insert(..) => match maybe_previous_insert.take() {
Some(prev) if prev.is_right_joinable() && next.is_left_joinable() => {
maybe_previous_insert = Some(prev.extend(next));
Box::new(iter::empty()) as Box<dyn Iterator<Item = RawOperation<T>>>
}
prev => {
maybe_previous_insert = Some(next);
Box::new(prev.into_iter())
}
},
RawOperation::Delete(..) => match maybe_previous_delete.take() {
Some(prev) if prev.is_right_joinable() && next.is_left_joinable() => {
maybe_previous_delete = Some(prev.extend(next));
Box::new(iter::empty()) as Box<dyn Iterator<Item = RawOperation<T>>>
}
prev => {
maybe_previous_delete = Some(next);
Box::new(prev.into_iter())
}
},
RawOperation::Equal(..) => Box::new(
maybe_previous_insert
.take()
.into_iter()
.chain(maybe_previous_delete.take())
.chain(iter::once(next)),
) as Box<dyn Iterator<Item = RawOperation<T>>>,
})
.collect();
if let Some(prev) = maybe_previous_insert {
result.push(prev);
}
if let Some(prev) = maybe_previous_delete {
result.push(prev);
}
result
}
// #[cfg(test)]
// mod tests {
// use super::*;
// #[test]
// fn test_elongate_operations_empty() {
// let operations: Vec<RawOperation<()>> = vec![];
// let result = elongate_operations(operations);
// assert_eq!(result, vec![]);
// }
// #[test]
// fn test_elongate_operations_single_operation() {
// let operations = vec![RawOperation::Insert(vec!["test".into()])];
// let result = elongate_operations(operations);
// assert_eq!(result.len(), 1);
// assert!(matches!(result[0], RawOperation::Insert(_)));
// }
// #[test]
// fn test_elongate_operations_interleaved() {
// let operations = vec![
// RawOperation::Insert(vec!["a".into()]),
// RawOperation::Delete(vec!["b".into()]),
// RawOperation::Insert(vec!["c".into()]),
// RawOperation::Delete(vec!["d".into()]),
// ];
// let result = elongate_operations(operations);
// assert_eq!(result.len(), 2);
// assert!(matches!(result[0], RawOperation::Insert(_)));
// assert!(matches!(result[1], RawOperation::Delete(_)));
// }
// #[test]
// fn test_elongate_operations_with_equal() {
// let operations = vec![
// RawOperation::Equal(vec!["a".into()]),
// RawOperation::Equal(vec!["b".into()]),
// RawOperation::Insert(vec!["c".into()]),
// RawOperation::Insert(vec!["d".into()]),
// ];
// let result = elongate_operations(operations);
// assert_eq!(result.len(), 2);
// assert!(matches!(result[0], RawOperation::Equal(_)));
// assert!(matches!(result[1], RawOperation::Insert(_)));
// }
// #[test]
// fn test_elongate_operations_mixed_sequence() {
// let operations = vec![
// RawOperation::Insert(vec!["a".into()]),
// RawOperation::Equal(vec!["b".into()]),
// RawOperation::Delete(vec!["c".into()]),
// RawOperation::Equal(vec!["d".into()]),
// ];
// let result = elongate_operations(operations);
// assert_eq!(result.len(), 4);
// assert!(matches!(result[0], RawOperation::Insert(_)));
// assert!(matches!(result[1], RawOperation::Equal(_)));
// assert!(matches!(result[2], RawOperation::Delete(_)));
// assert!(matches!(result[3], RawOperation::Equal(_)));
// }
// }