Compact diffs

This commit is contained in:
Andras Schmelczer 2024-11-24 22:32:17 +00:00
parent 331e264399
commit 8b8733c1e3
No known key found for this signature in database
GPG key ID: FC8F2C3D3D1A718C
3 changed files with 129 additions and 181 deletions

View file

@ -3,10 +3,14 @@ use crate::diffs::raw_operation::RawOperation;
use crate::errors::SyncLibError;
use crate::operation_transformation::merge_context::MergeContext;
use crate::tokenizer::token::Token;
use crate::tokenizer::word_tokenizer::word_tokenizer;
use crate::utils::ordered_operation::OrderedOperation;
use crate::utils::side::Side;
use crate::{diffs::myers::diff, utils::merge_iters::MergeSorted};
use ropey::Rope;
use std::hash::Hash;
use std::iter;
use std::path::Iter;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
@ -29,121 +33,129 @@ impl<'a> EditedText<'a> {
/// Create an EditedText from the given original (old) and updated (new) strings.
/// The returned EditedText represents the changes from the original to the updated text.
/// When the return value is applied to the original text, it will result in the updated text.
/// The default word tokenizer is used to tokenize the text which splits the text on whitespaces.
pub fn from_strings(original: &'a str, updated: &str) -> Self {
let original_tokens = Token::tokenize(original);
let updated_tokens = Token::tokenize(updated);
Self::from_strings_with_tokenizer(original, updated, &word_tokenizer)
}
let diff: Vec<RawOperation> = diff(&original_tokens, &updated_tokens);
/// Create an EditedText from the given original (old) and updated (new) strings.
/// The returned EditedText represents the changes from the original to the updated text.
/// When the return value is applied to the original text, it will result in the updated text.
/// The tokenizer function is used to tokenize the text.
pub fn from_strings_with_tokenizer<F, T>(
original: &'a str,
updated: &str,
tokenizer: &F,
) -> Self
where
F: Fn(&str) -> Vec<Token<T>>,
T: PartialEq + Hash + Clone,
{
let original_tokens = (tokenizer)(original);
let updated_tokens = (tokenizer)(updated);
let diff: Vec<RawOperation<T>> = diff(&original_tokens, &updated_tokens);
Self::new(
original,
Self::elongate_operations(Self::cook_operations(diff)),
// Self::cook_operations(diff),
Self::cook_operations(Self::elongate_operations(diff.into_iter())).collect(),
)
}
// Turn raw operations into ordered operations while keeping track of old & new indexes.
fn cook_operations(raw_operations: Vec<RawOperation>) -> Vec<OrderedOperation> {
fn cook_operations<I, T>(raw_operations: I) -> impl Iterator<Item = OrderedOperation>
where
I: IntoIterator<Item = RawOperation<T>>,
T: PartialEq + Hash + Clone,
{
let mut new_index = 0; // this is the start index of the operation on the new text
let mut order = 0; // this is the start index of the operation on the original text
raw_operations
.into_iter()
.flat_map(|raw_operation| {
let length = raw_operation.original_text_length();
raw_operations.into_iter().flat_map(move |raw_operation| {
let length = raw_operation.original_text_length();
let operation = match raw_operation {
RawOperation::Equal(..) => {
new_index += length;
order += length;
let operation = match raw_operation {
RawOperation::Equal(..) => {
new_index += length;
order += length;
None
}
RawOperation::Insert(..) => {
let op =
Operation::create_insert(new_index, raw_operation.get_original_text())
.map(|operation| OrderedOperation { order, operation });
new_index += length;
op
}
RawOperation::Delete(..) => {
let op = if cfg!(debug_assertions) {
Operation::create_delete_with_text(
new_index,
raw_operation.get_original_text(),
)
} else {
Operation::create_delete(new_index, length)
}
.map(|operation| OrderedOperation { order, operation });
order += length;
op
}
};
operation.into_iter()
})
.collect()
}
// TODO: shift ops befor compacting
fn elongate_operations(operations: Vec<OrderedOperation>) -> Vec<OrderedOperation> {
let mut maybe_previous: Option<OrderedOperation> = None;
let mut result: Vec<OrderedOperation> = operations
.into_iter()
.flat_map(|next| {
if let Some(previous) = maybe_previous.take() {
match (previous, next) {
(
previous @ OrderedOperation {
operation: Operation::Insert { .. },
..
},
next @ OrderedOperation {
operation: Operation::Insert { .. },
..
},
) if previous.operation.end_index() + 1 == next.operation.start_index() => {
maybe_previous = Some(OrderedOperation {
order: previous.order,
operation: previous.operation.extend(&next.operation),
});
None
}
(
previous @ OrderedOperation {
operation: Operation::Delete { .. },
..
},
next @ OrderedOperation {
operation: Operation::Delete { .. },
..
},
) if previous.operation.start_index() == next.operation.start_index() => {
maybe_previous = Some(OrderedOperation {
order: previous.order,
operation: previous.operation.extend(&next.operation),
});
None
}
(previous, next) => {
maybe_previous = Some(next);
Some(previous)
}
}
} else {
maybe_previous = Some(next.clone());
None
}
.into_iter()
RawOperation::Insert(..) => {
let op = Operation::create_insert(new_index, raw_operation.get_original_text())
.map(|operation| OrderedOperation { order, operation });
new_index += length;
op
}
RawOperation::Delete(..) => {
let op = if cfg!(debug_assertions) {
Operation::create_delete_with_text(
new_index,
raw_operation.get_original_text(),
)
} else {
Operation::create_delete(new_index, length)
}
.map(|operation| OrderedOperation { order, operation });
order += length;
op
}
};
operation.into_iter()
})
}
fn elongate_operations<I, T>(raw_operations: I) -> Vec<RawOperation<T>>
where
I: IntoIterator<Item = RawOperation<T>>,
T: PartialEq + Hash + Clone,
{
let mut maybe_previous_insert: Option<RawOperation<T>> = None;
let mut maybe_previous_delete: Option<RawOperation<T>> = None;
let mut result: Vec<RawOperation<T>> = raw_operations
.into_iter()
.flat_map(|next| match next {
RawOperation::Insert(..) => {
if let Some(prev) = maybe_previous_insert.take() {
maybe_previous_insert = prev.extend(next);
} else {
maybe_previous_insert = Some(next);
}
Box::new(iter::empty()) as Box<dyn Iterator<Item = RawOperation<T>>>
}
RawOperation::Delete(..) => {
if let Some(prev) = maybe_previous_delete.take() {
maybe_previous_delete = prev.extend(next);
} else {
maybe_previous_delete = Some(next);
}
Box::new(iter::empty()) as Box<dyn Iterator<Item = RawOperation<T>>>
}
RawOperation::Equal(..) => Box::new(
maybe_previous_insert
.take()
.into_iter()
.chain(maybe_previous_delete.take())
.chain(iter::once(next)),
)
as Box<dyn Iterator<Item = RawOperation<T>>>,
})
.collect();
if let Some(prev) = maybe_previous {
if let Some(prev) = maybe_previous_insert {
result.push(prev);
}
if let Some(prev) = maybe_previous_delete {
result.push(prev);
}
@ -257,20 +269,20 @@ mod tests {
assert_eq!(new_right.to_string(), text);
}
// #[test]
// fn test_calculate_operations_with_insert() {
// let original = "hello world! ...";
// let left = "Hello world! How are you?";
// let right = "hello world! I'm Andras.";
// let expected = "Hello world! I'm Andras. How are you?";
#[test]
fn test_calculate_operations_with_insert() {
let original = "hello world! ...";
let left = "hello world! I'm Andras.";
let right = "Hello world! How are you?";
let expected = "Hello world! I'm Andras.How are you?";
// let operations_1 = EditedText::from_strings(original, left);
// println!("{:#?}", operations_1);
// let operations_2 = EditedText::from_strings(original, right);
// println!("{:#?}", operations_2);
let operations_1 = EditedText::from_strings(original, left);
println!("{:#?}", operations_1);
let operations_2 = EditedText::from_strings(original, right);
println!("{:#?}", operations_2);
// let operations = operations_1.merge(operations_2);
let operations = operations_1.merge(operations_2);
// assert_eq!(operations.apply().unwrap(), expected);
// }
assert_eq!(operations.apply().unwrap(), expected);
}
}

View file

@ -251,70 +251,6 @@ impl Operation {
}
}
}
/// Merges the operation with another operation that is consequtive to this operation.
/// The other operation must start where this operation ends.
/// The two operations must be of the same type, otherwise panics.
pub fn extend(self, other: &Self) -> Self {
match (self, other) {
(
Operation::Insert { index, text },
Operation::Insert {
text: other_text, ..
},
) => {
let end_index = index + text.chars().count();
debug_assert!(
end_index == other.start_index(),
"Cannot merge non-consequtive inserts with index {} and {}",
end_index,
other.start_index()
);
Operation::Insert {
index,
text: text + other_text,
}
}
(
Operation::Delete {
index,
deleted_character_count,
#[cfg(debug_assertions)]
deleted_text,
},
Operation::Delete {
index: other_index,
deleted_character_count: other_deleted_character_count,
#[cfg(debug_assertions)]
deleted_text: other_deleted_text,
},
) => {
debug_assert!(
index == *other_index,
"Cannot merge non-consequtive deletes",
);
Operation::Delete {
index,
deleted_character_count: deleted_character_count
+ other_deleted_character_count,
#[cfg(debug_assertions)]
deleted_text: deleted_text
.into_iter()
.flat_map(|t1| other_deleted_text.as_ref().map(|t2| t1 + t2).into_iter())
.last(),
}
}
(this, other) => panic!(
"Cannot merge operations of different type: {:?} and {:?}",
&this, &other
),
}
}
}
impl Display for Operation {

View file

@ -25,20 +25,20 @@ EditedText {
},
OrderedOperation {
order: 21,
operation: Delete {
operation: Insert {
index: 26,
text: "you doing? Albert",
},
},
OrderedOperation {
order: 21,
operation: Delete {
index: 43,
deleted_character_count: 10,
deleted_text: Some(
"you? Adam",
),
},
},
OrderedOperation {
order: 31,
operation: Insert {
index: 26,
text: "you doing? Albert",
},
},
],
}