Compare commits
8 commits
main
...
asch/recon
| Author | SHA1 | Date | |
|---|---|---|---|
| c6261d9fc1 | |||
| 85454f9f01 | |||
| 44697164a0 | |||
| df5079efea | |||
| 46b52b7aff | |||
| 2dc0ada1fb | |||
| 1038f9cee0 | |||
| 744decb92f |
17 changed files with 359 additions and 317 deletions
1
.vscode/settings.json
vendored
1
.vscode/settings.json
vendored
|
|
@ -5,5 +5,6 @@
|
|||
"**/dist": true,
|
||||
"**/node_modules": true,
|
||||
"**/.sqlx": true,
|
||||
"**/snapshots": true,
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ use crate::{
|
|||
utils::{common_prefix_len::common_prefix_len, common_suffix_len::common_suffix_len},
|
||||
};
|
||||
|
||||
/// Myers' diff algorithm with deadline.
|
||||
/// Myers' diff algorithm.
|
||||
///
|
||||
/// Diff `old`, between indices `old_range` and `new` between indices
|
||||
/// `new_range`.
|
||||
|
|
|
|||
|
|
@ -30,12 +30,12 @@ where
|
|||
|
||||
pub fn is_left_joinable(&self) -> bool {
|
||||
let first_token = self.tokens().first();
|
||||
first_token.is_none_or(super::super::tokenizer::token::Token::get_is_left_joinable)
|
||||
first_token.is_none_or(|token| token.is_left_joinable)
|
||||
}
|
||||
|
||||
pub fn is_right_joinable(&self) -> bool {
|
||||
let last_token = self.tokens().last();
|
||||
last_token.is_none_or(super::super::tokenizer::token::Token::get_is_right_joinable)
|
||||
last_token.is_none_or(|token| token.is_right_joinable)
|
||||
}
|
||||
|
||||
/// Extends the operation with another operation. Only operations of the
|
||||
|
|
@ -49,8 +49,8 @@ where
|
|||
);
|
||||
|
||||
match (self, other) {
|
||||
(RawOperation::Insert(tokens1), RawOperation::Insert(tokens2)) => {
|
||||
RawOperation::Insert(tokens1.into_iter().chain(tokens2).collect())
|
||||
(RawOperation::Insert(self_tokens), RawOperation::Insert(other_tokens)) => {
|
||||
RawOperation::Insert(self_tokens.into_iter().chain(other_tokens).collect())
|
||||
}
|
||||
(RawOperation::Delete(tokens1), RawOperation::Delete(tokens2)) => {
|
||||
RawOperation::Delete(tokens1.into_iter().chain(tokens2).collect())
|
||||
|
|
|
|||
|
|
@ -7,4 +7,4 @@ pub use operation_transformation::{
|
|||
CursorPosition, EditedText, TextWithCursors, reconcile, reconcile_with_cursors,
|
||||
reconcile_with_tokenizer,
|
||||
};
|
||||
pub use tokenizer::{Tokenizer, token::Token};
|
||||
pub use tokenizer::{Tokenizer, token::Token, word_tokenizer::word_tokenizer};
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ mod edited_text;
|
|||
mod merge_context;
|
||||
mod operation;
|
||||
mod ordered_operation;
|
||||
mod utils;
|
||||
|
||||
pub use cursor::{CursorPosition, TextWithCursors};
|
||||
pub use edited_text::EditedText;
|
||||
|
|
|
|||
|
|
@ -1,23 +1,29 @@
|
|||
use core::iter;
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::{CursorPosition, Operation, TextWithCursors, ordered_operation::OrderedOperation};
|
||||
use crate::{
|
||||
diffs::{myers::diff, raw_operation::RawOperation},
|
||||
operation_transformation::merge_context::MergeContext,
|
||||
operation_transformation::{
|
||||
merge_context::MergeContext,
|
||||
utils::{cook_operations::cook_operations, elongate_operations::elongate_operations},
|
||||
},
|
||||
tokenizer::{Tokenizer, word_tokenizer::word_tokenizer},
|
||||
utils::{merge_iters::MergeSorted as _, side::Side, string_builder::StringBuilder},
|
||||
utils::{side::Side, string_builder::StringBuilder},
|
||||
};
|
||||
|
||||
/// A sequence of operations that can be applied to a text document.
|
||||
/// `EditedText` supports merging two sequences of operations using the
|
||||
/// principle of Operational Transformation.
|
||||
/// A text document and a sequence of operations that can be applied to the text
|
||||
/// document. `EditedText` supports merging two sequences of operations using
|
||||
/// the principles of Operational Transformation.
|
||||
///
|
||||
/// It's mainly created through the `from_strings` method, then merged with
|
||||
/// another `EditedText` derived from the same original text and then applied to
|
||||
/// the original text to get the reconciled text of concurrent edits.
|
||||
///
|
||||
/// In addition to text and operations, it also keeps track of cursor positions
|
||||
/// in the original text. The cursor positions are updated when the operations
|
||||
/// are applied, so that the cursor positions can be used to restore the
|
||||
/// cursor positions in the updated text.
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone, PartialEq, Default)]
|
||||
pub struct EditedText<'a, T>
|
||||
|
|
@ -63,123 +69,11 @@ where
|
|||
|
||||
Self::new(
|
||||
original,
|
||||
Self::cook_operations(Self::elongate_operations(diff)).collect(),
|
||||
cook_operations(elongate_operations(diff)).collect(),
|
||||
updated.cursors,
|
||||
)
|
||||
}
|
||||
|
||||
fn elongate_operations<I>(raw_operations: I) -> Vec<RawOperation<T>>
|
||||
where
|
||||
I: IntoIterator<Item = RawOperation<T>>,
|
||||
{
|
||||
// This might look bad, but this makes sense. The inserts and deltes can be
|
||||
// interleaved, such as: IDIDID and we need to turn this into IIIDDD.
|
||||
// So we need to keep track of both the last insert and delete operations, not
|
||||
// just the last one.
|
||||
let mut maybe_previous_insert: Option<RawOperation<T>> = None;
|
||||
let mut maybe_previous_delete: Option<RawOperation<T>> = None;
|
||||
|
||||
let mut result: Vec<RawOperation<T>> = raw_operations
|
||||
.into_iter()
|
||||
.flat_map(|next| match next {
|
||||
RawOperation::Insert(..) => match maybe_previous_insert.take() {
|
||||
Some(prev) if prev.is_right_joinable() && next.is_left_joinable() => {
|
||||
maybe_previous_insert = Some(prev.extend(next));
|
||||
Box::new(iter::empty()) as Box<dyn Iterator<Item = RawOperation<T>>>
|
||||
}
|
||||
prev => {
|
||||
maybe_previous_insert = Some(next);
|
||||
Box::new(prev.into_iter())
|
||||
}
|
||||
},
|
||||
RawOperation::Delete(..) => match maybe_previous_delete.take() {
|
||||
Some(prev) if prev.is_right_joinable() && next.is_left_joinable() => {
|
||||
maybe_previous_delete = Some(prev.extend(next));
|
||||
Box::new(iter::empty()) as Box<dyn Iterator<Item = RawOperation<T>>>
|
||||
}
|
||||
prev => {
|
||||
maybe_previous_delete = Some(next);
|
||||
Box::new(prev.into_iter())
|
||||
}
|
||||
},
|
||||
RawOperation::Equal(..) => Box::new(
|
||||
maybe_previous_insert
|
||||
.take()
|
||||
.into_iter()
|
||||
.chain(maybe_previous_delete.take())
|
||||
.chain(iter::once(next)),
|
||||
)
|
||||
as Box<dyn Iterator<Item = RawOperation<T>>>,
|
||||
})
|
||||
.collect();
|
||||
|
||||
if let Some(prev) = maybe_previous_insert {
|
||||
result.push(prev);
|
||||
}
|
||||
|
||||
if let Some(prev) = maybe_previous_delete {
|
||||
result.push(prev);
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
// Turn raw operations into ordered operations while keeping track of old & new
|
||||
// indexes.
|
||||
fn cook_operations<I>(raw_operations: I) -> impl Iterator<Item = OrderedOperation<T>>
|
||||
where
|
||||
I: IntoIterator<Item = RawOperation<T>>,
|
||||
{
|
||||
let mut new_index = 0; // this is the start index of the operation on the new text
|
||||
let mut order = 0; // this is the start index of the operation on the original text
|
||||
|
||||
raw_operations.into_iter().filter_map(move |raw_operation| {
|
||||
let length = raw_operation.original_text_length();
|
||||
|
||||
match raw_operation {
|
||||
RawOperation::Equal(..) => {
|
||||
let op = if cfg!(debug_assertions) {
|
||||
Operation::create_equal_with_text(
|
||||
new_index,
|
||||
raw_operation.get_original_text(),
|
||||
)
|
||||
} else {
|
||||
Operation::create_equal(new_index, length)
|
||||
}
|
||||
.map(|operation| OrderedOperation { order, operation });
|
||||
|
||||
new_index += length;
|
||||
order += length;
|
||||
|
||||
op
|
||||
}
|
||||
RawOperation::Insert(tokens) => {
|
||||
let op = Operation::create_insert(new_index, tokens)
|
||||
.map(|operation| OrderedOperation { order, operation });
|
||||
|
||||
new_index += length;
|
||||
|
||||
op
|
||||
}
|
||||
RawOperation::Delete(..) => {
|
||||
let op = if cfg!(debug_assertions) {
|
||||
Operation::create_delete_with_text(
|
||||
new_index,
|
||||
raw_operation.get_original_text(),
|
||||
)
|
||||
} else {
|
||||
Operation::create_delete(new_index, length)
|
||||
}
|
||||
.map(|operation| OrderedOperation { order, operation });
|
||||
|
||||
order += length;
|
||||
|
||||
op
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Create a new `EditedText` with the given operations.
|
||||
/// The operations must be in the order in which they are meant to be
|
||||
/// applied. The operations must not overlap.
|
||||
|
|
@ -223,82 +117,84 @@ where
|
|||
let mut left_cursors = self.cursors.into_iter().peekable();
|
||||
let mut right_cursors = other.cursors.into_iter().peekable();
|
||||
|
||||
let merged_operations: Vec<OrderedOperation<T>> = self
|
||||
.operations
|
||||
.into_iter()
|
||||
// The current text is always the left; the other operation is the right side.
|
||||
.map(|op| (op, Side::Left))
|
||||
.merge_sorted_by_key(
|
||||
other.operations.into_iter().map(|op| (op, Side::Right)),
|
||||
|(operation, _)| {
|
||||
(
|
||||
operation.order,
|
||||
operation.operation.start_index(),
|
||||
// Make sure that the ordering is deterministic regardless which text
|
||||
// is left or right.
|
||||
match &operation.operation {
|
||||
Operation::Equal { index, .. } => index.to_string(),
|
||||
Operation::Insert { text, .. } => text
|
||||
.iter()
|
||||
.map(crate::tokenizer::token::Token::original)
|
||||
.collect::<String>(),
|
||||
Operation::Delete {
|
||||
deleted_character_count,
|
||||
..
|
||||
} => deleted_character_count.to_string(),
|
||||
},
|
||||
)
|
||||
},
|
||||
)
|
||||
.flat_map(|(OrderedOperation { order, operation }, side)| {
|
||||
let original_start = operation.start_index() as i64;
|
||||
let original_end = operation.end_index();
|
||||
let original_length = operation.len() as i64;
|
||||
let mut merged_operations: Vec<OrderedOperation<T>> =
|
||||
Vec::with_capacity(self.operations.len() + other.operations.len());
|
||||
|
||||
let result = match side {
|
||||
Side::Left => operation.merge_operations_with_context(
|
||||
&mut right_merge_context,
|
||||
&mut left_merge_context,
|
||||
),
|
||||
Side::Right => operation.merge_operations_with_context(
|
||||
&mut left_merge_context,
|
||||
&mut right_merge_context,
|
||||
),
|
||||
let mut left_iter = self.operations.into_iter();
|
||||
let mut right_iter = other.operations.into_iter();
|
||||
|
||||
let mut maybe_left_op = left_iter.next();
|
||||
let mut maybe_right_op = right_iter.next();
|
||||
|
||||
loop {
|
||||
let (side, OrderedOperation { operation, order }) =
|
||||
match (maybe_left_op.clone(), maybe_right_op.clone()) {
|
||||
(Some(left_op), Some(right_op)) => {
|
||||
if left_op < right_op {
|
||||
(Side::Left, left_op)
|
||||
} else {
|
||||
(Side::Right, right_op)
|
||||
}
|
||||
}
|
||||
|
||||
(Some(left_op), None) => (Side::Left, left_op),
|
||||
(None, Some(right_op)) => (Side::Right, right_op),
|
||||
(None, None) => break,
|
||||
};
|
||||
|
||||
if let Some(ref op @ (Operation::Insert { .. } | Operation::Equal { .. })) = result
|
||||
{
|
||||
let shift = op.start_index() as i64 - original_start + op.len() as i64
|
||||
- original_length;
|
||||
match side {
|
||||
Side::Left => {
|
||||
while let Some(cursor) =
|
||||
left_cursors.next_if(|cursor| cursor.char_index <= original_end + 1)
|
||||
{
|
||||
merged_cursors.push(cursor.with_index(
|
||||
(op.start_index() as i64).max(cursor.char_index as i64 + shift)
|
||||
as usize,
|
||||
));
|
||||
}
|
||||
if side == Side::Left {
|
||||
maybe_left_op = left_iter.next();
|
||||
} else {
|
||||
maybe_right_op = right_iter.next();
|
||||
}
|
||||
|
||||
let original_start = operation.start_index() as i64;
|
||||
let original_end = operation.end_index();
|
||||
let original_length = operation.len() as i64;
|
||||
|
||||
let result = match side {
|
||||
Side::Left => operation.merge_operations_with_context(
|
||||
&mut right_merge_context,
|
||||
&mut left_merge_context,
|
||||
),
|
||||
Side::Right => operation.merge_operations_with_context(
|
||||
&mut left_merge_context,
|
||||
&mut right_merge_context,
|
||||
),
|
||||
};
|
||||
|
||||
if let Some(ref op @ (Operation::Insert { .. } | Operation::Equal { .. })) = result {
|
||||
let shift =
|
||||
op.start_index() as i64 - original_start + op.len() as i64 - original_length;
|
||||
match side {
|
||||
Side::Left => {
|
||||
while let Some(cursor) =
|
||||
left_cursors.next_if(|cursor| cursor.char_index <= original_end + 1)
|
||||
{
|
||||
merged_cursors.push(cursor.with_index(
|
||||
(op.start_index() as i64).max(cursor.char_index as i64 + shift)
|
||||
as usize,
|
||||
));
|
||||
}
|
||||
Side::Right => {
|
||||
while let Some(cursor) = right_cursors
|
||||
.next_if(|cursor| cursor.char_index <= original_end + 1)
|
||||
{
|
||||
merged_cursors.push(cursor.with_index(
|
||||
(op.start_index() as i64).max(cursor.char_index as i64 + shift)
|
||||
as usize,
|
||||
));
|
||||
}
|
||||
}
|
||||
Side::Right => {
|
||||
while let Some(cursor) =
|
||||
right_cursors.next_if(|cursor| cursor.char_index <= original_end + 1)
|
||||
{
|
||||
merged_cursors.push(cursor.with_index(
|
||||
(op.start_index() as i64).max(cursor.char_index as i64 + shift)
|
||||
as usize,
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
.map(|operation| OrderedOperation { order, operation })
|
||||
.into_iter()
|
||||
})
|
||||
.collect();
|
||||
merged_operations.extend(result.into_iter().map(|op| OrderedOperation {
|
||||
order,
|
||||
operation: op,
|
||||
}));
|
||||
}
|
||||
|
||||
let last_index = merged_operations
|
||||
.iter()
|
||||
|
|
|
|||
|
|
@ -12,3 +12,37 @@ where
|
|||
pub order: usize,
|
||||
pub operation: Operation<T>,
|
||||
}
|
||||
|
||||
impl<T> OrderedOperation<T>
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
pub fn get_sort_key(&self) -> (usize, usize, String) {
|
||||
(
|
||||
self.order,
|
||||
self.operation.start_index(),
|
||||
// Make sure that the ordering is deterministic regardless of which text
|
||||
// is left or right.
|
||||
match &self.operation {
|
||||
Operation::Equal { index, .. } => index.to_string(),
|
||||
Operation::Insert { text, .. } => text
|
||||
.iter()
|
||||
.map(crate::tokenizer::token::Token::original)
|
||||
.collect::<String>(),
|
||||
Operation::Delete {
|
||||
deleted_character_count,
|
||||
..
|
||||
} => deleted_character_count.to_string(),
|
||||
},
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> PartialOrd for OrderedOperation<T>
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||
self.get_sort_key().partial_cmp(&other.get_sort_key())
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,19 +16,7 @@ EditedText {
|
|||
},
|
||||
OrderedOperation {
|
||||
order: 12,
|
||||
operation: <equal ' ' from index 17>,
|
||||
},
|
||||
OrderedOperation {
|
||||
order: 13,
|
||||
operation: <equal 'How' from index 18>,
|
||||
},
|
||||
OrderedOperation {
|
||||
order: 16,
|
||||
operation: <equal ' ' from index 21>,
|
||||
},
|
||||
OrderedOperation {
|
||||
order: 17,
|
||||
operation: <equal 'are' from index 22>,
|
||||
operation: <equal ' How are' from index 17>,
|
||||
},
|
||||
OrderedOperation {
|
||||
order: 20,
|
||||
|
|
|
|||
|
|
@ -8,15 +8,7 @@ EditedText {
|
|||
operations: [
|
||||
OrderedOperation {
|
||||
order: 0,
|
||||
operation: <equal 'hello' from index 0>,
|
||||
},
|
||||
OrderedOperation {
|
||||
order: 5,
|
||||
operation: <equal ' ' from index 5>,
|
||||
},
|
||||
OrderedOperation {
|
||||
order: 6,
|
||||
operation: <equal 'world!' from index 6>,
|
||||
operation: <equal 'hello world!' from index 0>,
|
||||
},
|
||||
],
|
||||
cursors: [],
|
||||
|
|
|
|||
2
backend/reconcile/src/operation_transformation/utils.rs
Normal file
2
backend/reconcile/src/operation_transformation/utils.rs
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
pub mod cook_operations;
|
||||
pub mod elongate_operations;
|
||||
|
|
@ -0,0 +1,55 @@
|
|||
use crate::{
|
||||
diffs::raw_operation::RawOperation,
|
||||
operation_transformation::{Operation, ordered_operation::OrderedOperation},
|
||||
};
|
||||
|
||||
/// Turn raw operations into ordered operations while keeping track of old & new
|
||||
/// indexes.
|
||||
pub fn cook_operations<I, T>(raw_operations: I) -> impl Iterator<Item = OrderedOperation<T>>
|
||||
where
|
||||
I: IntoIterator<Item = RawOperation<T>>,
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
let mut new_index = 0; // this is the start index of the operation on the new text
|
||||
let mut order = 0; // this is the start index of the operation on the original text
|
||||
|
||||
raw_operations.into_iter().filter_map(move |raw_operation| {
|
||||
let length = raw_operation.original_text_length();
|
||||
|
||||
match raw_operation {
|
||||
RawOperation::Equal(..) => {
|
||||
let op = if cfg!(debug_assertions) {
|
||||
Operation::create_equal_with_text(new_index, raw_operation.get_original_text())
|
||||
} else {
|
||||
Operation::create_equal(new_index, length)
|
||||
}
|
||||
.map(|operation| OrderedOperation { order, operation });
|
||||
|
||||
new_index += length;
|
||||
order += length;
|
||||
|
||||
op
|
||||
}
|
||||
RawOperation::Insert(tokens) => {
|
||||
let op = Operation::create_insert(new_index, tokens)
|
||||
.map(|operation| OrderedOperation { order, operation });
|
||||
|
||||
new_index += length;
|
||||
|
||||
op
|
||||
}
|
||||
RawOperation::Delete(..) => {
|
||||
let op = if cfg!(debug_assertions) {
|
||||
Operation::create_delete_with_text(new_index, raw_operation.get_original_text())
|
||||
} else {
|
||||
Operation::create_delete(new_index, length)
|
||||
}
|
||||
.map(|operation| OrderedOperation { order, operation });
|
||||
|
||||
order += length;
|
||||
|
||||
op
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
|
@ -0,0 +1,154 @@
|
|||
use core::iter;
|
||||
|
||||
use crate::diffs::raw_operation::RawOperation;
|
||||
|
||||
/// Elongates the operations by merging adjacent insertions and deletions that
|
||||
/// can be joined. This makes the subsequent merging of operations more
|
||||
/// intuitive.
|
||||
pub fn elongate_operations<I, T>(raw_operations: I) -> Vec<RawOperation<T>>
|
||||
where
|
||||
I: IntoIterator<Item = RawOperation<T>>,
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
// This might look bad, but this makes sense. The inserts and deletes can be
|
||||
// interleaved, such as: IDIDID and we need to turn this into IIIDDD.
|
||||
// So we need to keep track of both the last insert and delete operations, not
|
||||
// just the last one.
|
||||
let mut maybe_previous_insert: Option<RawOperation<T>> = None;
|
||||
let mut maybe_previous_delete: Option<RawOperation<T>> = None;
|
||||
|
||||
// Equals can't be interleaved with inserts and deletes
|
||||
let mut maybe_previous_equal: Option<RawOperation<T>> = None;
|
||||
|
||||
let mut result: Vec<RawOperation<T>> = raw_operations
|
||||
.into_iter()
|
||||
.flat_map(|next| match next {
|
||||
RawOperation::Insert(..) => match maybe_previous_insert.take() {
|
||||
Some(prev) if prev.is_right_joinable() && next.is_left_joinable() => {
|
||||
maybe_previous_insert = Some(prev.extend(next));
|
||||
Box::new(iter::empty()) as Box<dyn Iterator<Item = RawOperation<T>>>
|
||||
}
|
||||
prev => {
|
||||
maybe_previous_insert = Some(next);
|
||||
Box::new(
|
||||
maybe_previous_equal
|
||||
.take()
|
||||
.into_iter()
|
||||
.chain(prev.into_iter()),
|
||||
) as Box<dyn Iterator<Item = RawOperation<T>>>
|
||||
}
|
||||
},
|
||||
RawOperation::Delete(..) => match maybe_previous_delete.take() {
|
||||
Some(prev) if prev.is_right_joinable() && next.is_left_joinable() => {
|
||||
maybe_previous_delete = Some(prev.extend(next));
|
||||
Box::new(iter::empty()) as Box<dyn Iterator<Item = RawOperation<T>>>
|
||||
}
|
||||
prev => {
|
||||
maybe_previous_delete = Some(next);
|
||||
Box::new(
|
||||
maybe_previous_equal
|
||||
.take()
|
||||
.into_iter()
|
||||
.chain(prev.into_iter()),
|
||||
) as Box<dyn Iterator<Item = RawOperation<T>>>
|
||||
}
|
||||
},
|
||||
RawOperation::Equal(..) => match maybe_previous_equal.take() {
|
||||
Some(prev) if prev.is_right_joinable() && next.is_left_joinable() => {
|
||||
maybe_previous_equal = Some(prev.extend(next));
|
||||
Box::new(iter::empty()) as Box<dyn Iterator<Item = RawOperation<T>>>
|
||||
}
|
||||
prev => {
|
||||
maybe_previous_equal = Some(next);
|
||||
Box::new(
|
||||
maybe_previous_insert
|
||||
.take()
|
||||
.into_iter()
|
||||
.chain(maybe_previous_delete.take())
|
||||
.chain(prev.into_iter()),
|
||||
) as Box<dyn Iterator<Item = RawOperation<T>>>
|
||||
}
|
||||
},
|
||||
})
|
||||
.collect();
|
||||
|
||||
if let Some(prev) = maybe_previous_insert {
|
||||
result.push(prev);
|
||||
}
|
||||
|
||||
if let Some(prev) = maybe_previous_delete {
|
||||
result.push(prev);
|
||||
}
|
||||
|
||||
if let Some(prev) = maybe_previous_equal {
|
||||
result.push(prev);
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
mod tests {
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_elongate_operations_empty() {
|
||||
let operations: Vec<RawOperation<()>> = vec![];
|
||||
let result = elongate_operations(operations);
|
||||
assert_eq!(result, vec![]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_elongate_operations_single_operation() {
|
||||
let operations = vec![RawOperation::Insert(vec!["test".into()])];
|
||||
let result = elongate_operations(operations);
|
||||
assert_eq!(result.len(), 1);
|
||||
assert!(matches!(result[0], RawOperation::Insert(_)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_elongate_operations_interleaved() {
|
||||
let operations = vec![
|
||||
RawOperation::Insert(vec!["a".into()]),
|
||||
RawOperation::Delete(vec!["b".into()]),
|
||||
RawOperation::Insert(vec!["c".into()]),
|
||||
RawOperation::Delete(vec!["d".into()]),
|
||||
];
|
||||
let result = elongate_operations(operations);
|
||||
assert_eq!(result.len(), 2);
|
||||
assert!(matches!(result[0], RawOperation::Insert(_)));
|
||||
assert!(matches!(result[1], RawOperation::Delete(_)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_elongate_operations_with_equal() {
|
||||
let operations = vec![
|
||||
RawOperation::Equal(vec!["a".into()]),
|
||||
RawOperation::Equal(vec!["b".into()]),
|
||||
RawOperation::Insert(vec!["c".into()]),
|
||||
RawOperation::Insert(vec!["d".into()]),
|
||||
];
|
||||
let result = elongate_operations(operations);
|
||||
assert_eq!(result.len(), 2);
|
||||
assert!(matches!(result[0], RawOperation::Equal(_)));
|
||||
assert!(matches!(result[1], RawOperation::Insert(_)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_elongate_operations_mixed_sequence() {
|
||||
let operations = vec![
|
||||
RawOperation::Insert(vec!["a".into()]),
|
||||
RawOperation::Equal(vec!["b".into()]),
|
||||
RawOperation::Delete(vec!["c".into()]),
|
||||
RawOperation::Equal(vec!["d".into()]),
|
||||
];
|
||||
let result = elongate_operations(operations);
|
||||
assert_eq!(result.len(), 4);
|
||||
assert!(matches!(result[0], RawOperation::Insert(_)));
|
||||
assert!(matches!(result[1], RawOperation::Equal(_)));
|
||||
assert!(matches!(result[2], RawOperation::Delete(_)));
|
||||
assert!(matches!(result[3], RawOperation::Equal(_)));
|
||||
}
|
||||
}
|
||||
|
|
@ -3,4 +3,5 @@ use token::Token;
|
|||
pub mod token;
|
||||
pub mod word_tokenizer;
|
||||
|
||||
/// A trait for tokenizers that take a string and return a list of tokens.
|
||||
pub type Tokenizer<T> = dyn Fn(&str) -> Vec<Token<T>>;
|
||||
|
|
|
|||
|
|
@ -2,8 +2,12 @@
|
|||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// A token is a string that has been normalised in some way.
|
||||
/// The normalised form is used for comparison, while the original form is used
|
||||
/// for applying `Operation`-s.
|
||||
///
|
||||
/// A token consists of the normalised form is used for comparison, and the
|
||||
/// original form used for subsequently applying `Operation`-s to a text
|
||||
/// document.
|
||||
///
|
||||
/// It's UTF-8 compatible.
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Token<T>
|
||||
|
|
@ -11,18 +15,20 @@ where
|
|||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
/// The normalised form of the token used deriving the diff.
|
||||
pub normalised: T,
|
||||
normalised: T,
|
||||
|
||||
/// The original string, that should be inserted or deleted in the document.
|
||||
original: String,
|
||||
|
||||
/// Whether the token is joinable with the previous token.
|
||||
is_left_joinable: bool,
|
||||
/// Whether the token is semantically joinable with the previous token.
|
||||
pub is_left_joinable: bool,
|
||||
|
||||
/// Whether the token is joinable with the next token.
|
||||
is_right_joinable: bool,
|
||||
/// Whether the token is semantically joinable with the next token.
|
||||
pub is_right_joinable: bool,
|
||||
}
|
||||
|
||||
/// Trivial implementation of Token when the normalised form is the same as the
|
||||
/// original string.
|
||||
impl From<&str> for Token<String> {
|
||||
fn from(text: &str) -> Self { Token::new(text.to_owned(), text.to_owned(), true, true) }
|
||||
}
|
||||
|
|
@ -47,13 +53,11 @@ where
|
|||
|
||||
pub fn original(&self) -> &str { &self.original }
|
||||
|
||||
pub fn set_normalised(&mut self, normalised: T) { self.normalised = normalised; }
|
||||
|
||||
pub fn normalised(&self) -> &T { &self.normalised }
|
||||
|
||||
pub fn get_original_length(&self) -> usize { self.original.chars().count() }
|
||||
|
||||
pub fn get_is_left_joinable(&self) -> bool { self.is_left_joinable }
|
||||
|
||||
pub fn get_is_right_joinable(&self) -> bool { self.is_right_joinable }
|
||||
}
|
||||
|
||||
impl<T> PartialEq for Token<T>
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
use super::token::Token;
|
||||
|
||||
/// Splits on word boundaries creating alternating words and whitespaces with
|
||||
/// the whitesspaces getting unique IDs.
|
||||
/// Splits text on word boundaries creating tokens of alternating words and
|
||||
/// whitespaces with the whitespaces getting unique IDs.
|
||||
///
|
||||
/// ## Example
|
||||
///
|
||||
|
|
@ -34,7 +34,8 @@ pub fn word_tokenizer(text: &str) -> Vec<Token<String>> {
|
|||
|
||||
for i in 0..result.len() - 1 {
|
||||
if result[i].original().chars().all(char::is_whitespace) {
|
||||
result[i].normalised = result[i].normalised().to_owned() + result[i + 1].original();
|
||||
let normalised = result[i].normalised().to_owned() + result[i + 1].original();
|
||||
result[i].set_normalised(normalised);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
pub mod common_prefix_len;
|
||||
pub mod common_suffix_len;
|
||||
pub mod find_longest_prefix_contained_within;
|
||||
pub mod merge_iters;
|
||||
pub mod side;
|
||||
pub mod string_builder;
|
||||
|
|
|
|||
|
|
@ -1,86 +0,0 @@
|
|||
use core::{cmp::Ordering, iter::Peekable};
|
||||
|
||||
pub struct MergeAscending<L, R, F, O>
|
||||
where
|
||||
L: Iterator<Item = R::Item>,
|
||||
R: Iterator,
|
||||
F: Fn(&R::Item) -> O,
|
||||
O: PartialOrd,
|
||||
{
|
||||
left: Peekable<L>,
|
||||
right: Peekable<R>,
|
||||
get_key: F,
|
||||
}
|
||||
|
||||
impl<L, R, F, O> MergeAscending<L, R, F, O>
|
||||
where
|
||||
L: Iterator<Item = R::Item>,
|
||||
R: Iterator,
|
||||
F: Fn(&R::Item) -> O,
|
||||
O: PartialOrd,
|
||||
{
|
||||
fn new(left: L, right: R, get_key: F) -> Self {
|
||||
MergeAscending {
|
||||
left: left.peekable(),
|
||||
right: right.peekable(),
|
||||
get_key,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<L, R, F, O> Iterator for MergeAscending<L, R, F, O>
|
||||
where
|
||||
L: Iterator<Item = R::Item>,
|
||||
R: Iterator,
|
||||
F: Fn(&R::Item) -> O,
|
||||
O: PartialOrd,
|
||||
{
|
||||
type Item = L::Item;
|
||||
|
||||
fn next(&mut self) -> Option<L::Item> {
|
||||
let order = match (self.left.peek(), self.right.peek()) {
|
||||
(Some(l), Some(r)) => (self.get_key)(l).partial_cmp(&(self.get_key)(r)),
|
||||
(Some(_), None) => Some(Ordering::Less),
|
||||
(None, Some(_)) => Some(Ordering::Greater),
|
||||
(None, None) => return None,
|
||||
};
|
||||
|
||||
match order {
|
||||
Some(Ordering::Less | Ordering::Equal) | None => self.left.next(),
|
||||
Some(Ordering::Greater) => self.right.next(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub trait MergeSorted: Iterator {
|
||||
fn merge_sorted_by_key<R, F, O>(self, other: R, get_key: F) -> MergeAscending<Self, R, F, O>
|
||||
where
|
||||
Self: Sized,
|
||||
R: Iterator<Item = Self::Item>,
|
||||
F: Fn(&Self::Item) -> O,
|
||||
O: PartialOrd,
|
||||
{
|
||||
MergeAscending::new(self, other, get_key)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: ?Sized> MergeSorted for T where T: Iterator {}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_merge_sorted_by_key() {
|
||||
let left = [9, 7, 5, 3, 1];
|
||||
let right = [7, 6, 5, 4, 3];
|
||||
|
||||
let result: Vec<i32> = left
|
||||
.into_iter()
|
||||
.merge_sorted_by_key(right.into_iter(), |x| -1 * x)
|
||||
.collect();
|
||||
assert_eq!(result, vec![9, 7, 7, 6, 5, 5, 4, 3, 3, 1]);
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue