Merge inserts with inserts

This commit is contained in:
Andras Schmelczer 2024-11-29 22:20:42 +00:00
parent 730038e0d0
commit 3303a31cc4
No known key found for this signature in database
GPG key ID: FC8F2C3D3D1A718C
7 changed files with 155 additions and 73 deletions

View file

@ -197,7 +197,14 @@ where
.map(|op| (op, Side::Left))
.merge_sorted_by_key(
other.operations.into_iter().map(|op| (op, Side::Right)),
|(operation, _)| operation.order,
|(operation, _)| {
(
operation.order,
// Operations on left and right must come in the same order so that
// inserts can be merged with other inserts and deletes with deletes.
matches!(operation.operation, Operation::Delete { .. }) as usize,
)
},
)
.flat_map(|(OrderedOperation { order, operation }, side)| {
match side {
@ -272,7 +279,7 @@ mod tests {
#[test]
fn test_calculate_operations_with_insert() {
let original = "hello world! ...";
let left = "hello world! I'm Andras.";
let left = "Hello world! I'm Andras.";
let right = "Hello world! How are you?";
let expected = "Hello world! I'm Andras.How are you?";
@ -282,7 +289,6 @@ mod tests {
println!("{:#?}", operations_2);
let operations = operations_1.merge(operations_2);
assert_eq!(operations.apply().unwrap(), expected);
}
}

View file

@ -1,6 +1,8 @@
use std::fmt::Debug;
use crate::operation_transformation::Operation;
#[derive(Debug, Clone)]
#[derive(Clone)]
pub struct MergeContext<T>
where
T: PartialEq + Clone,
@ -21,6 +23,18 @@ where
}
}
impl<T> Debug for MergeContext<T>
where
T: PartialEq + Clone,
{
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("MergeContext")
.field("last_operation", &self.last_operation)
.field("shift", &self.shift)
.finish()
}
}
impl<T> MergeContext<T>
where
T: PartialEq + Clone,
@ -55,18 +69,24 @@ where
threshold_operation: &Operation<T>,
) {
if let Some(last_operation) = self.last_operation.as_ref() {
if threshold_operation.start_index() as i64 + self.shift
> last_operation.end_index() as i64
if let Operation::Delete {
deleted_character_count,
..
} = last_operation
{
if let Operation::Delete {
deleted_character_count,
..
} = last_operation
if threshold_operation.start_index() as i64 + self.shift
> last_operation.end_index() as i64
{
self.shift -= *deleted_character_count as i64;
self.last_operation = None;
}
} else if let Operation::Insert { .. } = last_operation {
if threshold_operation.start_index() as i64 + self.shift
- last_operation.len() as i64
> last_operation.end_index() as i64
{
self.last_operation = None;
}
self.last_operation = None;
}
}
}

View file

@ -56,7 +56,7 @@ mod test {
"original_1 original_2 original_3",
"original_1 edit_1 original_3",
"original_1 edit_1 original_3",
"original_1 edit_1 edit_1 original_3",
"original_1 edit_1 original_3",
);
// One deleted a large range, the other deleted subranges and inserted as well
@ -120,7 +120,7 @@ mod test {
"both delete the same word but one a bit more",
"both the same word",
"both same word",
"both same wordword",
"both same word",
);
test_merge_both_ways(

View file

@ -1,7 +1,8 @@
use ropey::Rope;
use std::fmt::Display;
use crate::utils::find_common_overlap::find_common_overlap;
use crate::{errors::SyncLibError, Token};
use ropey::Rope;
use std::fmt::Debug;
use std::fmt::Display;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
@ -12,7 +13,7 @@ use super::merge_context::MergeContext;
/// Operation is tied to a ropey::Rope and is mainly expected to be
/// created by EditedText.
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone, PartialEq)]
#[derive(Clone, PartialEq)]
pub enum Operation<T>
where
T: PartialEq + Clone,
@ -205,22 +206,43 @@ where
produced_context: &mut MergeContext<T>,
) -> Option<Operation<T>> {
affecting_context.consume_last_operation_if_it_is_too_behind(&self);
let operation = self.with_shifted_index(affecting_context.shift);
match (operation, affecting_context.last_operation().clone()) {
match (operation, affecting_context.last_operation()) {
(operation @ Operation::Insert { .. }, None) => {
produced_context.shift += operation.len() as i64;
produced_context.consume_and_replace_last_operation(Some(operation.clone()));
Some(operation)
}
(operation, Some(last_insert @ Operation::Insert { .. })) => {
produced_context.shift += operation.len() as i64;
Some(operation)
(
Operation::Insert { text, index },
Some(Operation::Insert {
text: previous_inserted_text,
..
}),
) => {
let offset_in_tokens = find_common_overlap(previous_inserted_text, &text);
let trimmed_length_in_tokens = previous_inserted_text.len() - offset_in_tokens;
let trimmed_length = previous_inserted_text
.iter()
.skip(offset_in_tokens)
.map(|token| token.get_original_length())
.sum::<usize>();
let trimmed_operation =
Operation::create_insert(index, text[trimmed_length_in_tokens..].to_vec());
affecting_context.shift -= trimmed_length as i64;
produced_context.shift += trimmed_operation
.as_ref()
.map(|op| op.len())
.unwrap_or_default() as i64;
produced_context.consume_and_replace_last_operation(trimmed_operation.clone());
trimmed_operation
}
// We can never delete inside an insert
(operation @ Operation::Delete { .. }, None) => {
(operation @ Operation::Delete { .. }, None | Some(Operation::Insert { .. })) => {
produced_context.consume_and_replace_last_operation(Some(operation.clone()));
Some(operation)
}
@ -246,6 +268,8 @@ where
));
affecting_context.shift -= difference;
produced_context.consume_and_replace_last_operation(Some(moved_operation.clone()));
Some(moved_operation)
}
@ -321,6 +345,15 @@ where
}
}
impl<T> Debug for Operation<T>
where
T: PartialEq + Clone,
{
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self)
}
}
#[cfg(test)]
mod tests {
use super::*;

View file

@ -8,63 +8,19 @@ EditedText {
operations: [
OrderedOperation {
order: 0,
operation: Insert {
index: 0,
text: [
Token {
normalised: "Hello, ",
original: "Hello, ",
},
Token {
normalised: "my ",
original: "my ",
},
Token {
normalised: "friend! ",
original: "friend! ",
},
],
},
operation: <insert 'Hello, my friend! ' from index 0>,
},
OrderedOperation {
order: 0,
operation: Delete {
index: 18,
deleted_character_count: 13,
deleted_text: Some(
"hello world! ",
),
},
operation: <delete 'hello world! ' from index 18>,
},
OrderedOperation {
order: 21,
operation: Insert {
index: 26,
text: [
Token {
normalised: "you ",
original: "you ",
},
Token {
normalised: "doing? ",
original: "doing? ",
},
Token {
normalised: "Albert",
original: "Albert",
},
],
},
operation: <insert 'you doing? Albert' from index 26>,
},
OrderedOperation {
order: 21,
operation: Delete {
index: 43,
deleted_character_count: 10,
deleted_text: Some(
"you? Adam",
),
},
operation: <delete 'you? Adam' from index 43>,
},
],
}

View file

@ -0,0 +1,66 @@
use crate::Token;
/// Given two lists of tokens, returns the offset in the first (old) list from which the two lists have the same tokens until the end of the first list.
/// Thus, the suffix of the old list from the offset to the end is equal to a prefix of the new list.
///
/// If there is no overlap, the function returns the maxmium offset, the length of the old list.
///
/// ## Example
/// ```
/// old: [0, 1, 9, 0, 2, 5]
/// new: [9, 0, 2, 5, 1]
/// ```
/// > results in an offset of 2
pub fn find_common_overlap<T>(old: &[Token<T>], new: &[Token<T>]) -> usize
where
T: PartialEq + Clone,
{
let minimum_offset = old.len().saturating_sub(new.len());
for offset in minimum_offset..old.len() {
if old.iter().skip(offset).zip(new.iter()).all(|(a, b)| a == b) {
return offset;
}
}
old.len()
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
#[test]
fn test_common_overlap() {
assert_eq!(find_common_overlap(&["".into()], &["".into()]), 0);
assert_eq!(
find_common_overlap(
&["a".into(), "b".into(), "c".into()],
&["b".into(), "c".into(), "a".into()]
),
1
);
assert_eq!(
find_common_overlap(
&["a".into(), "a".into(), "a".into()],
&["a".into(), "b".into(), "c".into()]
),
2
);
assert_eq!(
find_common_overlap(
&["a".into(), "b".into(), "c".into()],
&["d".into(), "e".into(), "a".into()]
),
3
);
assert_eq!(
find_common_overlap(&["a".into(), "a".into()], &["a".into()]),
1
);
}
}

View file

@ -1,5 +1,6 @@
pub mod common_prefix_len;
pub mod common_suffix_len;
pub mod find_common_overlap;
pub mod merge_iters;
pub mod ordered_operation;
pub mod side;