Refactor & simplify merging logic #3

Merged
schmelczer merged 15 commits from asch/simplify into main 2025-06-22 15:07:04 +01:00
16 changed files with 481 additions and 698 deletions

View file

@ -41,12 +41,12 @@ lto = true
opt-level = 3
strip="debuginfo" # Keep some info for better panics
[workspace.lints.rust]
[lints.rust]
unsafe_code = "forbid"
rust_2018_idioms = { level = "warn", priority = -1 }
missing_debug_implementations = "warn"
[workspace.lints.clippy]
[lints.clippy]
await_holding_lock = "warn"
dbg_macro = "warn"
empty_enum = "warn"

View file

@ -1,26 +1,22 @@
# VaultLink self-hosted Obsidian plugin for file syncing
[![Check](https://github.com/schmelczer/vault-link/actions/workflows/check.yml/badge.svg)](https://github.com/schmelczer/vault-link/actions/workflows/check.yml)
[![E2E tests](https://github.com/schmelczer/vault-link/actions/workflows/e2e.yml/badge.svg)](https://github.com/schmelczer/vault-link/actions/workflows/e2e.yml)
[![Publish server Docker image](https://github.com/schmelczer/vault-link/actions/workflows/publish-docker.yml/badge.svg)](https://github.com/schmelczer/vault-link/actions/workflows/publish-docker.yml)
[![Publish Obsidian plugin](https://github.com/schmelczer/vault-link/actions/workflows/publish-plugin.yml/badge.svg)](https://github.com/schmelczer/vault-link/actions/workflows/publish-plugin.yml)
[![Check](https://github.com/schmelczer/reconcile/actions/workflows/check.yml/badge.svg)](https://github.com/schmelczer/reconcile/actions/workflows/check.yml)
[![Publish to GitHub Pages](https://github.com/schmelczer/reconcile/actions/workflows/gh-pages.yml/badge.svg)](https://github.com/schmelczer/reconcile/actions/workflows/gh-pages.yml)
## Develop
### Install [nvm](https://github.com/nvm-sh/nvm)
- `curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.1/install.sh | bash`
- `nvm install 22`
- `nvm use 22`
- Optionally set the system-wide default: `nvm alias default 22`
- `curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.1/install.sh | bash`
- `nvm install 22`
- `nvm use 22`
- Optionally set the system-wide default: `nvm alias default 22`
### Set up Rust
- Install [`rustup`](https://rustup.rs): `curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh`
- Install [`wasm-pack`](https://rustwasm.github.io/wasm-pack/installer): `curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh`
- `cargo install cargo-insta sqlx-cli cargo-edit`
- Install [`rustup`](https://rustup.rs): `curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh`
- Install [`wasm-pack`](https://rustwasm.github.io/wasm-pack/installer): `curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh`
- `cargo install cargo-insta sqlx-cli cargo-edit`
### Install Obsidian on Linux
@ -45,7 +41,6 @@ scripts/update-api-types.sh
scripts/bump-version.sh patch
```
#### Run E2E tests
```sh
@ -56,4 +51,4 @@ And to clean up the logs & database files, run `scripts/clean-up.sh`
## Projects
- [Sync server](./backend/sync_server/README.md)
- [Sync server](./backend/sync_server/README.md)

View file

@ -1,8 +1,6 @@
mod cursor;
mod edited_text;
mod merge_context;
mod operation;
mod ordered_operation;
mod utils;
pub use cursor::{CursorPosition, TextWithCursors};
@ -111,8 +109,8 @@ mod test {
}, // inside of "s|ample" because "text" got replaced by "sample"
CursorPosition {
id: 3,
char_index: 43
}, // before "cursor movements"
char_index: 30
}, // after "complex sample"
]
)
);

View file

@ -1,12 +1,11 @@
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use super::{CursorPosition, Operation, TextWithCursors, ordered_operation::OrderedOperation};
use super::{CursorPosition, Operation, TextWithCursors};
use crate::{
diffs::{myers::diff, raw_operation::RawOperation},
operation_transformation::{
merge_context::MergeContext,
utils::{cook_operations::cook_operations, elongate_operations::elongate_operations},
operation_transformation::utils::{
cook_operations::cook_operations, elongate_operations::elongate_operations,
},
tokenizer::{Tokenizer, word_tokenizer::word_tokenizer},
utils::{side::Side, string_builder::StringBuilder},
@ -31,7 +30,7 @@ where
T: PartialEq + Clone + std::fmt::Debug,
{
text: &'a str,
operations: Vec<OrderedOperation<T>>,
operations: Vec<Operation<T>>,
pub(crate) cursors: Vec<CursorPosition>,
}
@ -77,23 +76,7 @@ where
/// Create a new `EditedText` with the given operations.
/// The operations must be in the order in which they are meant to be
/// applied. The operations must not overlap.
fn new(
text: &'a str,
operations: Vec<OrderedOperation<T>>,
mut cursors: Vec<CursorPosition>,
) -> Self {
operations
.iter()
.zip(operations.iter().skip(1))
.for_each(|(previous, next)| {
debug_assert!(
previous.operation.start_index() <= next.operation.start_index(),
"{} must not come before {} yet it does",
previous.operation,
next.operation
);
});
fn new(text: &'a str, operations: Vec<Operation<T>>, mut cursors: Vec<CursorPosition>) -> Self {
cursors.sort_by_key(|cursor| cursor.char_index);
Self {
@ -110,14 +93,11 @@ where
"`EditedText`-s must be derived from the same text to be mergable"
);
let mut left_merge_context = MergeContext::default();
let mut right_merge_context = MergeContext::default();
let mut merged_cursors = Vec::with_capacity(self.cursors.len() + other.cursors.len());
let mut left_cursors = self.cursors.into_iter().peekable();
let mut right_cursors = other.cursors.into_iter().peekable();
let mut merged_operations: Vec<OrderedOperation<T>> =
let mut merged_operations: Vec<Operation<T>> =
Vec::with_capacity(self.operations.len() + other.operations.len());
let mut left_iter = self.operations.into_iter();
@ -126,89 +106,107 @@ where
let mut maybe_left_op = left_iter.next();
let mut maybe_right_op = right_iter.next();
let mut seen_left_length: usize = 0;
let mut seen_right_length: usize = 0;
let mut merged_length: usize = 0;
let mut last_left_op = None;
let mut last_right_op = None;
loop {
let (side, OrderedOperation { operation, order }) =
let (side, operation, mut last_other_op) =
match (maybe_left_op.clone(), maybe_right_op.clone()) {
(Some(left_op), Some(right_op)) => {
if left_op < right_op {
(Side::Left, left_op)
if left_op
.get_sort_key(seen_left_length)
.partial_cmp(&right_op.get_sort_key(seen_right_length))
== Some(std::cmp::Ordering::Less)
{
(Side::Left, left_op, last_right_op.clone())
} else {
(Side::Right, right_op)
(Side::Right, right_op, last_left_op.clone())
}
}
(Some(left_op), None) => (Side::Left, left_op),
(None, Some(right_op)) => (Side::Right, right_op),
(Some(left_op), None) => (Side::Left, left_op, last_right_op.clone()),
(None, Some(right_op)) => (Side::Right, right_op, last_left_op.clone()),
(None, None) => break,
};
if side == Side::Left {
maybe_left_op = left_iter.next();
} else {
maybe_right_op = right_iter.next();
}
let is_advancing_operation = matches!(
operation,
Operation::Insert { .. } | Operation::Equal { .. }
);
let original_start = operation.start_index() as i64;
let original_end = operation.end_index();
let original_length = operation.len() as i64;
let result = match side {
Side::Left => operation.merge_operations_with_context(
&mut right_merge_context,
&mut left_merge_context,
),
Side::Right => operation.merge_operations_with_context(
&mut left_merge_context,
&mut right_merge_context,
),
Side::Left => {
let result = operation.merge_operations(&mut last_other_op);
if let ref op @ (Operation::Insert { .. } | Operation::Equal { .. }) = result {
let shift = merged_length as i64 - seen_left_length as i64
+ op.len() as i64
- original_length;
while let Some(cursor) = left_cursors.next_if(|cursor| {
cursor.char_index <= seen_left_length + original_length as usize
}) {
merged_cursors.push(
cursor.with_index((cursor.char_index as i64 + shift) as usize),
);
}
}
if is_advancing_operation {
seen_left_length += original_length as usize;
}
maybe_left_op = left_iter.next();
last_left_op = Some(result.clone());
result
}
Side::Right => {
let result = operation.merge_operations(&mut last_other_op);
if let ref op @ (Operation::Insert { .. } | Operation::Equal { .. }) = result {
let shift = merged_length as i64 - seen_right_length as i64
+ op.len() as i64
- original_length;
while let Some(cursor) = right_cursors.next_if(|cursor| {
cursor.char_index <= seen_right_length + original_length as usize
}) {
merged_cursors.push(
cursor.with_index((cursor.char_index as i64 + shift) as usize),
);
}
}
if is_advancing_operation {
seen_right_length += original_length as usize;
}
maybe_right_op = right_iter.next();
last_right_op = Some(result.clone());
result
}
};
if let Some(ref op @ (Operation::Insert { .. } | Operation::Equal { .. })) = result {
let shift =
op.start_index() as i64 - original_start + op.len() as i64 - original_length;
match side {
Side::Left => {
while let Some(cursor) =
left_cursors.next_if(|cursor| cursor.char_index <= original_end + 1)
{
merged_cursors.push(cursor.with_index(
(op.start_index() as i64).max(cursor.char_index as i64 + shift)
as usize,
));
}
}
Side::Right => {
while let Some(cursor) =
right_cursors.next_if(|cursor| cursor.char_index <= original_end + 1)
{
merged_cursors.push(cursor.with_index(
(op.start_index() as i64).max(cursor.char_index as i64 + shift)
as usize,
));
}
}
}
if result.len() == 0 {
continue;
}
merged_operations.extend(result.into_iter().map(|op| OrderedOperation {
order,
operation: op,
}));
if is_advancing_operation {
merged_length += result.len();
}
merged_operations.push(result);
}
let last_index = merged_operations
.iter()
.filter(|operation| {
matches!(
operation.operation,
Operation::Insert { .. } | Operation::Equal { .. }
)
})
.next_back()
.map_or(0, |op| op.operation.end_index());
for cursor in left_cursors.chain(right_cursors) {
merged_cursors.push(cursor.with_index(last_index));
merged_cursors.push(cursor.with_index(merged_length));
}
Self::new(self.text, merged_operations, merged_cursors)
@ -219,7 +217,7 @@ where
pub fn apply(&self) -> String {
let mut builder: StringBuilder<'_> = StringBuilder::new(self.text);
for OrderedOperation { operation, .. } in &self.operations {
for operation in &self.operations {
builder = operation.apply(builder);
}
@ -229,8 +227,6 @@ where
#[cfg(test)]
mod tests {
use std::env;
use insta::assert_debug_snapshot;
use pretty_assertions::assert_eq;

View file

@ -1,73 +0,0 @@
use core::fmt::Debug;
use crate::operation_transformation::Operation;
#[derive(Clone, Debug)]
pub struct MergeContext<T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
last_operation: Option<Operation<T>>,
pub shift: i64,
}
impl<T> Default for MergeContext<T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
fn default() -> Self {
MergeContext {
last_operation: None,
shift: 0,
}
}
}
impl<T> MergeContext<T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
pub fn last_operation(&self) -> Option<&Operation<T>> { self.last_operation.as_ref() }
pub fn replace_last_operation(&mut self, operation: Option<Operation<T>>) {
self.last_operation = operation;
}
/// Replace the last delete operation (if there was one) with a new one
/// while applying it to the `shift` in case the last operation
/// was a delete.
pub fn consume_and_replace_last_operation(&mut self, operation: Option<Operation<T>>) {
if let Some(Operation::Delete {
deleted_character_count,
..
}) = self.last_operation.take()
{
self.shift -= deleted_character_count as i64;
}
self.last_operation = operation;
}
/// Remove the last operation (if there was one) in case it is behind the
/// threshold operation. This updates the `shift` in case the last operation
/// was a delete.
pub fn consume_last_operation_if_it_is_too_behind(&mut self, threshold_index: i64) {
if let Some(last_operation) = self.last_operation.as_ref() {
if let Operation::Delete {
deleted_character_count,
..
} = last_operation
{
if threshold_index + self.shift > last_operation.end_index() as i64 {
self.shift -= *deleted_character_count as i64;
self.last_operation = None;
}
} else if let Operation::Insert { .. } = last_operation
&& threshold_index + self.shift - last_operation.len() as i64
> last_operation.end_index() as i64
{
self.last_operation = None;
}
}
}
}

View file

@ -1,10 +1,8 @@
use core::fmt::{Debug, Display};
use std::ops::Range;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use super::merge_context::MergeContext;
use crate::{
Token,
utils::{
@ -21,7 +19,7 @@ where
T: PartialEq + Clone + std::fmt::Debug,
{
Equal {
index: usize,
order: usize,
length: usize,
#[cfg(debug_assertions)]
@ -29,12 +27,12 @@ where
},
Insert {
index: usize,
order: usize,
text: Vec<Token<T>>,
},
Delete {
index: usize,
order: usize,
deleted_character_count: usize,
#[cfg(debug_assertions)]
@ -49,74 +47,83 @@ where
/// Creates an equal operation with the given index.
/// This operation is used to indicate that the text at the given index
/// is unchanged.
pub fn create_equal(index: usize, length: usize) -> Option<Self> {
if length == 0 {
return None;
}
Some(Operation::Equal {
index,
pub fn create_equal(order: usize, length: usize) -> Self {
Operation::Equal {
order,
length,
#[cfg(debug_assertions)]
text: None,
})
}
}
pub fn create_equal_with_text(index: usize, text: String) -> Option<Self> {
if text.is_empty() {
return None;
}
Some(Operation::Equal {
index,
pub fn create_equal_with_text(order: usize, text: String) -> Self {
Operation::Equal {
order,
length: text.chars().count(),
#[cfg(debug_assertions)]
text: Some(text),
})
}
}
/// Creates an insert operation with the given index and text.
/// If the text is empty (meaning that the operation would be a no-op),
/// returns None.
pub fn create_insert(index: usize, text: Vec<Token<T>>) -> Option<Self> {
if text.is_empty() {
return None;
}
Some(Operation::Insert { index, text })
pub fn create_insert(order: usize, text: Vec<Token<T>>) -> Self {
Operation::Insert { order, text }
}
/// Creates a delete operation with the given index and number of
/// to-be-deleted characters. If the operation would delete 0 (meaning
/// that the operation would be a no-op), returns None.
pub fn create_delete(index: usize, deleted_character_count: usize) -> Option<Self> {
if deleted_character_count == 0 {
return None;
}
Some(Operation::Delete {
index,
/// to-be-deleted characters.
pub fn create_delete(order: usize, deleted_character_count: usize) -> Self {
Operation::Delete {
order,
deleted_character_count,
#[cfg(debug_assertions)]
deleted_text: None,
})
}
}
pub fn create_delete_with_text(index: usize, text: String) -> Option<Self> {
if text.is_empty() {
return None;
}
Some(Operation::Delete {
index,
pub fn create_delete_with_text(order: usize, text: String) -> Self {
Operation::Delete {
order,
deleted_character_count: text.chars().count(),
#[cfg(debug_assertions)]
deleted_text: Some(text),
})
}
}
fn order(&self) -> usize {
match self {
Operation::Equal { order, .. }
| Operation::Insert { order, .. }
| Operation::Delete { order, .. } => *order,
}
}
pub fn get_sort_key(&self, insertion_index: usize) -> (usize, usize, usize, String) {
(
self.order(),
match self {
Operation::Delete { .. } => 1,
Operation::Insert { .. } => 2,
Operation::Equal { .. } => 3,
},
insertion_index,
// Make sure that the ordering is deterministic regardless of which text
// is left or right.
match self {
Operation::Equal { length, .. } => length.to_string(),
Operation::Insert { text, .. } => {
text.iter().map(Token::original).collect::<String>()
}
Operation::Delete {
deleted_character_count,
..
} => deleted_character_count.to_string(),
},
)
}
/// Applies the operation to the given `StringBuilder`, returning the
@ -129,63 +136,47 @@ where
Operation::Equal {
#[cfg(debug_assertions)]
text,
length,
..
} => {
#[cfg(debug_assertions)]
debug_assert!(
text.as_ref()
.is_none_or(|text| builder.get_slice(self.range()) == *text),
"Text which is supposed to be equal does not match the text in the range"
.is_none_or(|text| builder.get_slice_from_remaining(self.len()) == *text),
"Text (`{}`) which is supposed to be equal does not match the text in the \
range: `{}`",
text.as_ref().unwrap_or(&String::new()),
builder.get_slice_from_remaining(self.len())
);
return builder;
builder.retain(*length);
}
Operation::Insert { text, .. } => {
builder.insert(&text.iter().map(Token::original).collect::<String>());
}
Operation::Insert { text, .. } => builder.insert(
self.start_index(),
&text.iter().map(Token::original).collect::<String>(),
),
Operation::Delete {
#[cfg(debug_assertions)]
deleted_text,
deleted_character_count,
..
} => {
#[cfg(debug_assertions)]
debug_assert!(
deleted_text
.as_ref()
.is_none_or(|text| builder.get_slice(self.range()) == *text),
"Text to delete does not match the text in the range"
.is_none_or(|text| builder.get_slice_from_remaining(self.len()) == *text),
"Text to-be-deleted `{}` does not match the text in the range: `{}`",
deleted_text.as_ref().unwrap_or(&String::new()),
builder.get_slice_from_remaining(self.len())
);
builder.delete(self.range());
builder.delete(*deleted_character_count);
}
}
builder
}
/// Returns the index of the first character that the operation affects.
pub fn start_index(&self) -> usize {
match self {
Operation::Equal { index, .. }
| Operation::Insert { index, .. }
| Operation::Delete { index, .. } => *index,
}
}
/// Returns the index of the last character that the operation affects.
pub fn end_index(&self) -> usize {
debug_assert!(
self.len() > 0,
" len() must be greater than 0 because operations must be non-empty"
);
self.start_index() + self.len() - 1
}
/// Returns the range of indices of characters that the operation affects.
#[allow(clippy::range_plus_one)]
pub fn range(&self) -> Range<usize> { self.start_index()..self.end_index() + 1 }
/// Returns the number of affected characters. It is always greater than 0
/// because empty operations cannot be created.
pub fn len(&self) -> usize {
@ -199,76 +190,17 @@ where
}
}
/// Creates a new operation with the same type and text but with the given
/// index.
pub fn with_index(self, index: usize) -> Self {
match self {
Operation::Equal {
length,
#[cfg(debug_assertions)]
text,
..
} => Operation::Equal {
index,
length,
#[cfg(debug_assertions)]
text,
},
Operation::Insert { text, .. } => Operation::Insert { index, text },
Operation::Delete {
deleted_character_count,
#[cfg(debug_assertions)]
deleted_text,
..
} => Operation::Delete {
index,
deleted_character_count,
#[cfg(debug_assertions)]
deleted_text,
},
}
}
/// Creates a new operation with the same type and text but with the index
/// shifted by the given offset. The offset can be negative but the
/// resulting index must be non-negative.
///
/// # Panics
///
/// In debug mode, panics if the resulting index is negative.
pub fn with_shifted_index(self, offset: i64) -> Self {
let index = self.start_index() as i64 + offset;
debug_assert!(index >= 0, "Shifted index must be non-negative");
self.with_index(index as usize)
}
/// Merges the operation with the given context, producing a new operation
/// and updating the context. This implements a comples FSM that handles
/// the merging of operations in a way that is consistent with the text.
/// The contexts are updated in-place.
#[allow(clippy::too_many_lines)]
pub fn merge_operations_with_context(
self,
affecting_context: &mut MergeContext<T>,
produced_context: &mut MergeContext<T>,
) -> Option<Operation<T>> {
affecting_context.consume_last_operation_if_it_is_too_behind(self.start_index() as i64);
let operation = self.with_shifted_index(affecting_context.shift);
match (operation, affecting_context.last_operation()) {
(operation @ Operation::Insert { .. }, None | Some(Operation::Equal { .. })) => {
produced_context.shift += operation.len() as i64;
produced_context.consume_and_replace_last_operation(Some(operation.clone()));
Some(operation)
}
pub fn merge_operations(self, previous_operation: &mut Option<Self>) -> Operation<T> {
let operation = self;
match (operation, previous_operation) {
(
Operation::Insert { text, index },
Operation::Insert { order, text },
Some(Operation::Insert {
text: previous_inserted_text,
..
@ -279,129 +211,113 @@ where
// This way, we don't end up duplicating text.
let offset_in_tokens =
find_longest_prefix_contained_within(previous_inserted_text, &text);
let offset_in_length = text
.iter()
.take(offset_in_tokens)
.map(Token::get_original_length)
.sum::<usize>();
let trimmed_operation =
Operation::create_insert(index, text[offset_in_tokens..].to_vec());
affecting_context.shift -= offset_in_length as i64;
produced_context.shift += trimmed_operation
.as_ref()
.map(Operation::len)
.unwrap_or_default() as i64;
produced_context.consume_and_replace_last_operation(trimmed_operation.clone());
trimmed_operation
Operation::create_insert(order, text[offset_in_tokens..].to_vec())
}
(
operation @ Operation::Delete { .. },
None | Some(Operation::Insert { .. } | Operation::Equal { .. }),
) => {
produced_context.consume_and_replace_last_operation(Some(operation.clone()));
Some(operation)
}
Operation::Delete {
order,
deleted_character_count,
(
operation @ Operation::Insert { .. },
Some(last_delete @ Operation::Delete { .. }),
#[cfg(debug_assertions)]
deleted_text,
},
Some(Operation::Delete {
order: last_delete_order,
deleted_character_count: last_delete_deleted_character_count,
..
}),
) => {
produced_context.shift += operation.len() as i64;
let operation_end_index = order + deleted_character_count;
let last_delete_end_index =
*last_delete_order + *last_delete_deleted_character_count;
debug_assert!(
last_delete.range().contains(&operation.start_index()),
"There is a last delete ({last_delete}) but the operation ({operation}) is \
not contained in it"
let new_length = deleted_character_count
.min(0.max(operation_end_index as i64 - last_delete_end_index as i64) as usize);
let overlap = deleted_character_count - new_length;
#[cfg(debug_assertions)]
let updated_delete = deleted_text.as_ref().map_or_else(
|| Operation::create_delete(order + overlap, new_length),
|text| {
Operation::create_delete_with_text(
order + overlap,
text.chars()
.skip(deleted_character_count - new_length)
.collect::<String>(),
)
},
);
let difference = operation.start_index() as i64 - last_delete.start_index() as i64;
let moved_operation = operation.with_index(last_delete.start_index());
affecting_context.replace_last_operation(Operation::create_delete(
moved_operation.end_index() + 1,
(last_delete.len() as i64 - difference) as usize,
));
affecting_context.shift -= difference;
produced_context.consume_and_replace_last_operation(Some(moved_operation.clone()));
Some(moved_operation)
}
(
operation @ Operation::Delete { .. },
Some(last_delete @ Operation::Delete { .. }),
) => {
debug_assert!(
last_delete.range().contains(&operation.start_index()),
"There is a last delete ({last_delete}) but the operation ({operation}) is \
not contained in it"
);
let difference = operation.start_index() as i64 - last_delete.start_index() as i64;
let updated_delete = Operation::create_delete(
last_delete.start_index(),
0.max(operation.end_index() as i64 - last_delete.end_index() as i64) as usize,
);
affecting_context.replace_last_operation(Operation::create_delete(
last_delete.start_index(),
0.max(last_delete.end_index() as i64 - operation.end_index() as i64) as usize,
));
affecting_context.shift -= difference;
produced_context.consume_and_replace_last_operation(updated_delete.clone());
#[cfg(not(debug_assertions))]
let updated_delete = Operation::create_delete(order + overlap, new_length);
updated_delete
}
(
ref operation @ Operation::Equal {
Operation::Equal {
order,
length,
#[cfg(debug_assertions)]
ref text,
..
},
Some(last_delete @ Operation::Delete { .. }),
Some(Operation::Delete {
order: last_delete_order,
deleted_character_count: last_delete_deleted_character_count,
..
}),
) => {
debug_assert!(
last_delete.range().contains(&operation.start_index()),
"There is a last delete ({last_delete}) but the operation ({operation}) is \
not contained in it"
);
let last_delete_end_index =
*last_delete_order + *last_delete_deleted_character_count;
let overlap = (length as i64)
.min(last_delete.end_index() as i64 - operation.start_index() as i64 + 1);
let overlap =
0.max((length as i64).min(last_delete_end_index as i64 - order as i64));
#[cfg(debug_assertions)]
let result = text.as_ref().map_or_else(
let updated_equal = text.as_ref().map_or_else(
|| {
Operation::create_equal(
operation.end_index().min(last_delete.end_index()),
order + overlap as usize,
(length as i64 - overlap) as usize,
)
},
|text| {
Operation::create_equal_with_text(
operation.end_index().min(last_delete.end_index()),
order + overlap as usize,
text.chars().skip(overlap as usize).collect::<String>(),
)
},
);
#[cfg(not(debug_assertions))]
let result = Operation::create_equal(
operation.end_index().min(last_delete.end_index()),
let updated_equal = Operation::create_equal(
order + overlap as usize,
(length as i64 - overlap) as usize,
);
result
updated_equal
}
(operation @ Operation::Equal { .. }, _) => Some(operation),
(
ref operation @ Operation::Equal { ref order, .. },
Some(Operation::Equal {
order: last_equal_order,
length: last_equal_length,
..
}),
) => {
if operation.len() == *last_equal_length && *order == *last_equal_order {
Operation::create_equal(*order, 0)
} else {
operation.clone()
}
}
(operation, _) => operation,
}
}
}
@ -413,7 +329,7 @@ where
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match self {
Operation::Equal {
index,
order,
length,
#[cfg(debug_assertions)]
@ -422,28 +338,29 @@ where
#[cfg(debug_assertions)]
write!(
f,
"<equal {} from index {}>",
"<equal {} from {order}>",
text.as_ref()
.map(|text| format!("'{text}'"))
.map(|text| format!("'{}'", text.replace('\n', "\\n")))
.unwrap_or(format!("{length} characters")),
index
)?;
#[cfg(not(debug_assertions))]
write!(f, "<equal {length} from index {index}>")?;
write!(f, "<equal {length} from {order}>")?;
Ok(())
}
Operation::Insert { index, text } => {
Operation::Insert { order, text } => {
write!(
f,
"<insert '{}' from index {}>",
text.iter().map(Token::original).collect::<String>(),
index
"<insert '{}' at {order}>",
text.iter()
.map(Token::original)
.collect::<String>()
.replace('\n', "\\n"),
)
}
Operation::Delete {
index,
order,
deleted_character_count,
#[cfg(debug_assertions)]
@ -452,18 +369,17 @@ where
#[cfg(debug_assertions)]
write!(
f,
"<delete {} from index {}>",
"<delete {} from {order}>",
deleted_text
.as_ref()
.map(|text| format!("'{text}'"))
.map(|text| format!("'{}'", text.replace('\n', "\\n")))
.unwrap_or(format!("{deleted_character_count} characters")),
index
)?;
#[cfg(not(debug_assertions))]
write!(
f,
"<delete {deleted_character_count} characters from index {index}>",
"<delete {deleted_character_count} characters from {order}>",
)?;
Ok(())
@ -485,29 +401,28 @@ mod tests {
use super::*;
#[test]
#[should_panic(expected = "Shifted index must be non-negative")]
fn test_shifting_error() {
insta::assert_debug_snapshot!(
Operation::create_insert(1, vec!["hi".into()])
.unwrap()
.with_shifted_index(-2)
);
}
#[test]
fn test_apply_delete_with_create() {
let builder = StringBuilder::new("hello world");
let operation = Operation::<()>::create_delete_with_text(5, " world".to_owned()).unwrap();
let delete_operation = Operation::<()>::create_delete_with_text(0, "hello ".to_owned());
let retain_operation = Operation::<()>::create_equal(6, 5);
assert_eq!(operation.apply(builder).build(), "hello");
let mut builder = delete_operation.apply(builder);
builder = retain_operation.apply(builder);
assert_eq!(builder.build(), "world");
}
#[test]
fn test_apply_insert() {
let builder = StringBuilder::new("hello");
let operation = Operation::create_insert(5, vec![" my friend".into()]).unwrap();
assert_eq!(operation.apply(builder).build(), "hello my friend");
let retain_operation = Operation::<()>::create_equal(0, 5);
let insert_operation = Operation::create_insert(5, vec![" my friend".into()]);
let mut builder = retain_operation.apply(builder);
builder = insert_operation.apply(builder);
assert_eq!(builder.build(), "hello my friend");
}
}

View file

@ -1,48 +0,0 @@
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::operation_transformation::Operation;
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone, PartialEq)]
pub struct OrderedOperation<T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
pub order: usize,
pub operation: Operation<T>,
}
impl<T> OrderedOperation<T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
pub fn get_sort_key(&self) -> (usize, usize, String) {
(
self.order,
self.operation.start_index(),
// Make sure that the ordering is deterministic regardless of which text
// is left or right.
match &self.operation {
Operation::Equal { index, .. } => index.to_string(),
Operation::Insert { text, .. } => text
.iter()
.map(crate::tokenizer::token::Token::original)
.collect::<String>(),
Operation::Delete {
deleted_character_count,
..
} => deleted_character_count.to_string(),
},
)
}
}
impl<T> PartialOrd for OrderedOperation<T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
self.get_sort_key().partial_cmp(&other.get_sort_key())
}
}

View file

@ -1,43 +1,19 @@
---
source: reconcile/src/operation_transformation/edited_text.rs
source: src/operation_transformation/edited_text.rs
expression: operations
snapshot_kind: text
---
EditedText {
text: "hello world! How are you? Adam",
operations: [
OrderedOperation {
order: 0,
operation: <insert 'Hello, my friend!' from index 0>,
},
OrderedOperation {
order: 0,
operation: <delete 'hello world!' from index 17>,
},
OrderedOperation {
order: 12,
operation: <equal ' ' from index 17>,
},
OrderedOperation {
order: 13,
operation: <equal 'How' from index 18>,
},
OrderedOperation {
order: 16,
operation: <equal ' ' from index 21>,
},
OrderedOperation {
order: 17,
operation: <equal 'are' from index 22>,
},
OrderedOperation {
order: 20,
operation: <insert ' you doing? Albert' from index 25>,
},
OrderedOperation {
order: 20,
operation: <delete ' you? Adam' from index 43>,
},
<delete 'hello world!' from 0>,
<insert 'Hello, my friend!' at 12>,
<equal ' ' from 12>,
<equal 'How' from 13>,
<equal ' ' from 16>,
<equal 'are' from 17>,
<delete ' you? Adam' from 20>,
<insert ' you doing? Albert' at 31>,
],
cursors: [],
}

View file

@ -1,23 +1,14 @@
---
source: reconcile/src/operation_transformation/edited_text.rs
source: src/operation_transformation/edited_text.rs
expression: operations
snapshot_kind: text
---
EditedText {
text: "hello world!",
operations: [
OrderedOperation {
order: 0,
operation: <equal 'hello' from index 0>,
},
OrderedOperation {
order: 5,
operation: <equal ' ' from index 5>,
},
OrderedOperation {
order: 6,
operation: <equal 'world!' from index 6>,
},
<equal 'hello' from 0>,
<equal ' ' from 5>,
<equal 'world!' from 6>,
],
cursors: [],
}

View file

@ -1,52 +1,44 @@
use crate::{
diffs::raw_operation::RawOperation,
operation_transformation::{Operation, ordered_operation::OrderedOperation},
};
use crate::{diffs::raw_operation::RawOperation, operation_transformation::Operation};
/// Turn raw operations into ordered operations while keeping track of old & new
/// indexes.
pub fn cook_operations<I, T>(raw_operations: I) -> impl Iterator<Item = OrderedOperation<T>>
/// Turn raw operations into ordered operations while keeping track of the
/// original token's indexes.
pub fn cook_operations<I, T>(raw_operations: I) -> impl Iterator<Item = Operation<T>>
where
I: IntoIterator<Item = RawOperation<T>>,
T: PartialEq + Clone + std::fmt::Debug,
{
let mut new_index = 0; // this is the start index of the operation on the new text
let mut order = 0; // this is the start index of the operation on the original text
let mut original_text_index = 0; // this is the start index of the operation on the original text
raw_operations.into_iter().filter_map(move |raw_operation| {
raw_operations.into_iter().map(move |raw_operation| {
let length = raw_operation.original_text_length();
match raw_operation {
RawOperation::Equal(..) => {
let op = if cfg!(debug_assertions) {
Operation::create_equal_with_text(new_index, raw_operation.get_original_text())
Operation::create_equal_with_text(
original_text_index,
raw_operation.get_original_text(),
)
} else {
Operation::create_equal(new_index, length)
}
.map(|operation| OrderedOperation { order, operation });
Operation::create_equal(original_text_index, length)
};
new_index += length;
order += length;
op
}
RawOperation::Insert(tokens) => {
let op = Operation::create_insert(new_index, tokens)
.map(|operation| OrderedOperation { order, operation });
new_index += length;
original_text_index += length;
op
}
RawOperation::Insert(tokens) => Operation::create_insert(original_text_index, tokens),
RawOperation::Delete(..) => {
let op = if cfg!(debug_assertions) {
Operation::create_delete_with_text(new_index, raw_operation.get_original_text())
Operation::create_delete_with_text(
original_text_index,
raw_operation.get_original_text(),
)
} else {
Operation::create_delete(new_index, length)
}
.map(|operation| OrderedOperation { order, operation });
Operation::create_delete(original_text_index, length)
};
order += length;
original_text_index += length;
op
}

View file

@ -17,6 +17,8 @@ where
let mut maybe_previous_insert: Option<RawOperation<T>> = None;
let mut maybe_previous_delete: Option<RawOperation<T>> = None;
// We don't elongate `equals` as they're needed to maintain cursor positions
// when merging against deletes.
let mut result: Vec<RawOperation<T>> = raw_operations
.into_iter()
.flat_map(|next| match next {
@ -41,87 +43,80 @@ where
}
},
RawOperation::Equal(..) => Box::new(
maybe_previous_insert
maybe_previous_delete
.take()
.into_iter()
.chain(maybe_previous_delete.take())
.chain(maybe_previous_insert.take())
.chain(iter::once(next)),
) as Box<dyn Iterator<Item = RawOperation<T>>>,
})
.collect();
if let Some(prev) = maybe_previous_insert {
if let Some(prev) = maybe_previous_delete {
result.push(prev);
}
if let Some(prev) = maybe_previous_delete {
if let Some(prev) = maybe_previous_insert {
result.push(prev);
}
result
}
// #[cfg(test)]
// mod tests {
#[cfg(test)]
mod tests {
use super::*;
use crate::tokenizer::token::Token;
// use super::*;
// Helper constructors for cleaner tests
fn ins(texts: &[&str]) -> RawOperation<String> {
RawOperation::Insert(texts.iter().map(|t| Token::from(*t)).collect())
}
// #[test]
// fn test_elongate_operations_empty() {
// let operations: Vec<RawOperation<()>> = vec![];
// let result = elongate_operations(operations);
// assert_eq!(result, vec![]);
// }
fn del(texts: &[&str]) -> RawOperation<String> {
RawOperation::Delete(texts.iter().map(|t| Token::from(*t)).collect())
}
// #[test]
// fn test_elongate_operations_single_operation() {
// let operations = vec![RawOperation::Insert(vec!["test".into()])];
// let result = elongate_operations(operations);
// assert_eq!(result.len(), 1);
// assert!(matches!(result[0], RawOperation::Insert(_)));
// }
fn ins_custom(text: &str, lj: bool, rj: bool) -> RawOperation<String> {
RawOperation::Insert(vec![Token::new(text.to_string(), text.to_string(), lj, rj)])
}
// #[test]
// fn test_elongate_operations_interleaved() {
// let operations = vec![
// RawOperation::Insert(vec!["a".into()]),
// RawOperation::Delete(vec!["b".into()]),
// RawOperation::Insert(vec!["c".into()]),
// RawOperation::Delete(vec!["d".into()]),
// ];
// let result = elongate_operations(operations);
// assert_eq!(result.len(), 2);
// assert!(matches!(result[0], RawOperation::Insert(_)));
// assert!(matches!(result[1], RawOperation::Delete(_)));
// }
#[test]
fn merges_adjacent_joinable_inserts() {
let ops = vec![ins(&["a"]), ins(&["b"]), ins(&["c"])];
let result = elongate_operations(ops);
assert_eq!(result.len(), 1);
match &result[0] {
RawOperation::Insert(tokens) => {
let originals: String = tokens.iter().map(|t| t.original()).collect();
assert_eq!(originals, "abc");
}
_ => panic!("Expected single Insert operation"),
}
}
// #[test]
// fn test_elongate_operations_with_equal() {
// let operations = vec![
// RawOperation::Equal(vec!["a".into()]),
// RawOperation::Equal(vec!["b".into()]),
// RawOperation::Insert(vec!["c".into()]),
// RawOperation::Insert(vec!["d".into()]),
// ];
// let result = elongate_operations(operations);
// assert_eq!(result.len(), 2);
// assert!(matches!(result[0], RawOperation::Equal(_)));
// assert!(matches!(result[1], RawOperation::Insert(_)));
// }
#[test]
fn does_not_merge_when_not_joinable() {
let ops = vec![
ins_custom("a", true, false), // not right-joinable
ins_custom("b", true, true), // left-joinable but previous isn't right-joinable
];
let result = elongate_operations(ops);
assert_eq!(
result.len(),
2,
"Operations should remain separate when not joinable"
);
}
// #[test]
// fn test_elongate_operations_mixed_sequence() {
// let operations = vec![
// RawOperation::Insert(vec!["a".into()]),
// RawOperation::Equal(vec!["b".into()]),
// RawOperation::Delete(vec!["c".into()]),
// RawOperation::Equal(vec!["d".into()]),
// ];
// let result = elongate_operations(operations);
// assert_eq!(result.len(), 4);
// assert!(matches!(result[0], RawOperation::Insert(_)));
// assert!(matches!(result[1], RawOperation::Equal(_)));
// assert!(matches!(result[2], RawOperation::Delete(_)));
// assert!(matches!(result[3], RawOperation::Equal(_)));
// }
// }
#[test]
fn merges_interleaved_insert_delete_sequences() {
// Pattern IDID -> II DD
let ops = vec![ins(&["i1"]), del(&["d1"]), ins(&["i2"]), del(&["d2"])];
let result = elongate_operations(ops);
assert_eq!(result.len(), 2);
assert!(matches!(result[0], RawOperation::Delete(_)));
assert!(matches!(result[1], RawOperation::Insert(_)));
}
}

View file

@ -9,20 +9,20 @@ use crate::Token;
/// old: [0, 1, 9, 0, 2, 5]
/// new: [9, 0, 2, 5, 1]
/// ```
/// > results in an length of 4
/// > results in a length of 4
///
///
/// ```not_rust
/// old: [0, 1, 9, 0, 2, 5]
/// new: [0, 2]
/// ```
/// > results in an length of 2
/// > results in a length of 2
///
/// ```not_rust
/// old: [0, 1, 9, 0, 2, 5]
/// new: [0, 4]
/// ```
/// > results in an length of 1
/// > results in a length of 1
pub fn find_longest_prefix_contained_within<T>(old: &[Token<T>], new: &[Token<T>]) -> usize
where
T: PartialEq + Clone + std::fmt::Debug,

View file

@ -1,83 +1,67 @@
use core::ops::Range;
use std::iter::Iterator;
/// A helper for building a string in order based on an original string and a
/// series of insertions and deletions applied to it. It is safe to use with
/// UTF-8 strings as all operations are based on character indices.
#[derive(Debug, Clone)]
/// A helper for building a string in-order based on an original string and a
/// series of insertions, deletions, and copies applied to it. It is safe to use
/// with UTF-8 strings as all operations are based on character indices. The
/// methods must be called in-order.
pub struct StringBuilder<'a> {
original: &'a str,
last_old_char_index: usize,
original: Box<dyn Iterator<Item = char> + 'a>,
buffer: String,
#[cfg(debug_assertions)]
remaining: String,
}
impl StringBuilder<'_> {
pub fn new(original: &str) -> StringBuilder<'_> {
StringBuilder {
original,
last_old_char_index: 0,
original: Box::new(original.chars()),
buffer: String::with_capacity(original.len()),
#[cfg(debug_assertions)]
remaining: original.to_owned(),
}
}
/// Insert a string at the given index after copying the original string up
/// to that index from the last insertion or deletion.
pub fn insert(&mut self, from: usize, text: &str) {
self.copy_until(from);
self.buffer.push_str(text);
/// Insert a string at the end of the built buffer.
pub fn insert(&mut self, text: &str) { self.buffer.push_str(text); }
/// Skip copying `length` characters from the original string to the built
/// buffer.
pub fn delete(&mut self, length: usize) {
if length == 0 {
return;
}
self.original.nth(length - 1);
if cfg!(debug_assertions) {
self.remaining = self.remaining.chars().skip(length).collect();
}
}
/// Delete a string at the given index after copying the original string up
/// to that index from the last insertion or deletion.
pub fn delete(&mut self, range: core::ops::Range<usize>) {
self.copy_until(range.start);
self.last_old_char_index += range.len();
}
/// Copy `length` characters from the original string to the built buffer.
pub fn retain(&mut self, length: usize) {
self.buffer.extend(self.original.by_ref().take(length));
fn copy_until(&mut self, index: usize) {
let current_char_count = self.buffer.chars().count();
debug_assert!(
index >= current_char_count,
"String builder only support building in order"
);
let jump = index - current_char_count;
self.buffer.push_str(
&self
.original
.chars()
.skip(self.last_old_char_index)
.take(jump)
.collect::<String>(),
);
self.last_old_char_index += jump;
if cfg!(debug_assertions) {
self.remaining = self.remaining.chars().skip(length).collect();
}
}
/// Finish building the string after copying the remaining original string
/// since the last insertion or deletion.
pub fn build(mut self) -> String {
self.buffer.push_str(
&self
.original
.chars()
.skip(self.last_old_char_index)
.collect::<String>(),
);
pub fn build(self) -> String { self.buffer }
self.buffer
}
#[cfg(debug_assertions)]
/// Get a slice of the remaining original string. The slice starts from
/// where the next delete/retain operation would start and is of length
/// `length`. The implementation is quite suboptimal but it's only used
/// for debugging.
pub fn get_slice_from_remaining(&self, length: usize) -> String {
let result = self.remaining.chars().take(length).collect::<String>();
#[allow(dead_code)]
pub fn get_slice(&self, range: Range<usize>) -> String {
let result = self
.buffer
.chars()
.chain(self.original.chars().skip(self.last_old_char_index))
.skip(range.start)
.take(range.end - range.start)
.collect::<String>();
debug_assert_eq!(result.chars().count(), range.len(), "Range out of bounds",);
debug_assert_eq!(result.chars().count(), length, "Range out of bounds");
result
}
@ -85,6 +69,8 @@ impl StringBuilder<'_> {
#[cfg(test)]
mod tests {
use pretty_assertions::assert_eq;
use super::*;
#[test]
@ -92,20 +78,74 @@ mod tests {
let original = "aaa bbb ccc";
let mut builder = StringBuilder::new(original);
builder.insert(0, "ddd ");
builder.delete(4..8);
builder.insert(11, " eee");
builder.insert("ddd");
builder.delete(3);
builder.retain(8);
builder.insert(" eee");
assert_eq!(builder.build(), "ddd bbb ccc eee");
}
#[test]
fn test_string_builder2() {
let original = "abcde";
let mut builder = StringBuilder::new(original);
builder.delete(1..4);
builder.retain(1);
builder.delete(3);
builder.retain(1);
assert_eq!(builder.build(), "ae");
}
#[test]
fn test_empty_original() {
let original = "";
let mut builder = StringBuilder::new(original);
builder.insert("test");
assert_eq!(builder.build(), "test");
}
#[test]
fn test_unicode_characters() {
let original = "こんにちは";
let mut builder = StringBuilder::new(original);
builder.retain(3);
builder.insert("世界, "); // Insert "World, "
builder.retain(2);
assert_eq!(builder.build(), "こんに世界, ちは");
}
#[test]
fn test_get_slice() {
let original = "abcdef";
let builder = StringBuilder::new(original);
// Test getting a slice of the original string
assert_eq!(builder.get_slice_from_remaining(3), "abc");
// Test getting a slice that includes both buffer and remaining original
let mut builder = StringBuilder::new(original);
builder.retain(2); // "ab" in buffer
assert_eq!(builder.get_slice_from_remaining(2), "cd");
}
#[test]
fn test_retain_all() {
let original = "Hello, world!";
let mut builder = StringBuilder::new(original);
builder.retain(original.len());
assert_eq!(builder.build(), original);
}
#[test]
fn test_delete_all() {
let original = "Hello";
let mut builder = StringBuilder::new(original);
builder.delete(original.len());
builder.insert("Hi");
assert_eq!(builder.build(), "Hi");
}
}

View file

@ -24,6 +24,12 @@ left: long small
right: long with big and small
expected: long small
---
parent: long run of text where one barely has no changes but has cursors
left: long| run of tex|t where one barely has no |changes but has |cursors
right: long run one barely has no changes cursors
expected: long| ru|n one barely has no |changes |cursors
---
parent: long text where the cursor has to be clamped after delete
left: long text where the cursor has to be clamped after delete|

View file

@ -2,7 +2,7 @@
parent: original_1 original_2 original_3
left: original_1 edit_1| original_3
right: original_1 original_2| edit_2
expected: original_1 edit_1|| edit_2
expected: original_1| edit_1| edit_2
---
# Both replace the same token with the same value

View file

@ -43,7 +43,7 @@ expected: Send the |detailed |quarterly |detailed report to the |entire |team
parent: Ready, Set go
left: Ready! Set go|
right: Ready, Set, go!|
expected: Ready! Set, go!||
expected: Ready!| Set, go!|
---
parent: "Total: $100"
@ -67,7 +67,7 @@ expected: market| placemarket|space
parent: A B C D
left: A X B D|
right: A B Y|
expected: A X B |Y|
expected: A X B| Y|
---
parent: Please submit your assignment by Friday