diff --git a/Cargo.toml b/Cargo.toml index 272e1bf..4adaf56 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,12 +41,12 @@ lto = true opt-level = 3 strip="debuginfo" # Keep some info for better panics -[workspace.lints.rust] +[lints.rust] unsafe_code = "forbid" rust_2018_idioms = { level = "warn", priority = -1 } missing_debug_implementations = "warn" -[workspace.lints.clippy] +[lints.clippy] await_holding_lock = "warn" dbg_macro = "warn" empty_enum = "warn" diff --git a/README.md b/README.md index 4735cf5..a3d3d7f 100644 --- a/README.md +++ b/README.md @@ -1,26 +1,22 @@ # VaultLink self-hosted Obsidian plugin for file syncing -[![Check](https://github.com/schmelczer/vault-link/actions/workflows/check.yml/badge.svg)](https://github.com/schmelczer/vault-link/actions/workflows/check.yml) -[![E2E tests](https://github.com/schmelczer/vault-link/actions/workflows/e2e.yml/badge.svg)](https://github.com/schmelczer/vault-link/actions/workflows/e2e.yml) -[![Publish server Docker image](https://github.com/schmelczer/vault-link/actions/workflows/publish-docker.yml/badge.svg)](https://github.com/schmelczer/vault-link/actions/workflows/publish-docker.yml) -[![Publish Obsidian plugin](https://github.com/schmelczer/vault-link/actions/workflows/publish-plugin.yml/badge.svg)](https://github.com/schmelczer/vault-link/actions/workflows/publish-plugin.yml) - +[![Check](https://github.com/schmelczer/reconcile/actions/workflows/check.yml/badge.svg)](https://github.com/schmelczer/reconcile/actions/workflows/check.yml) +[![Publish to GitHub Pages](https://github.com/schmelczer/reconcile/actions/workflows/gh-pages.yml/badge.svg)](https://github.com/schmelczer/reconcile/actions/workflows/gh-pages.yml) ## Develop ### Install [nvm](https://github.com/nvm-sh/nvm) -- `curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.1/install.sh | bash` -- `nvm install 22` -- `nvm use 22` -- Optionally set the system-wide default: `nvm alias default 22` - +- `curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.1/install.sh | bash` +- `nvm install 22` +- `nvm use 22` +- Optionally set the system-wide default: `nvm alias default 22` ### Set up Rust -- Install [`rustup`](https://rustup.rs): `curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh` -- Install [`wasm-pack`](https://rustwasm.github.io/wasm-pack/installer): `curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh` -- `cargo install cargo-insta sqlx-cli cargo-edit` +- Install [`rustup`](https://rustup.rs): `curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh` +- Install [`wasm-pack`](https://rustwasm.github.io/wasm-pack/installer): `curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh` +- `cargo install cargo-insta sqlx-cli cargo-edit` ### Install Obsidian on Linux @@ -45,7 +41,6 @@ scripts/update-api-types.sh scripts/bump-version.sh patch ``` - #### Run E2E tests ```sh @@ -56,4 +51,4 @@ And to clean up the logs & database files, run `scripts/clean-up.sh` ## Projects -- [Sync server](./backend/sync_server/README.md) +- [Sync server](./backend/sync_server/README.md) diff --git a/src/operation_transformation.rs b/src/operation_transformation.rs index fc280ec..777c2f0 100644 --- a/src/operation_transformation.rs +++ b/src/operation_transformation.rs @@ -1,8 +1,6 @@ mod cursor; mod edited_text; -mod merge_context; mod operation; -mod ordered_operation; mod utils; pub use cursor::{CursorPosition, TextWithCursors}; @@ -111,8 +109,8 @@ mod test { }, // inside of "s|ample" because "text" got replaced by "sample" CursorPosition { id: 3, - char_index: 43 - }, // before "cursor movements" + char_index: 30 + }, // after "complex sample" ] ) ); diff --git a/src/operation_transformation/edited_text.rs b/src/operation_transformation/edited_text.rs index bd9a15d..4e9c7be 100644 --- a/src/operation_transformation/edited_text.rs +++ b/src/operation_transformation/edited_text.rs @@ -1,12 +1,11 @@ #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; -use super::{CursorPosition, Operation, TextWithCursors, ordered_operation::OrderedOperation}; +use super::{CursorPosition, Operation, TextWithCursors}; use crate::{ diffs::{myers::diff, raw_operation::RawOperation}, - operation_transformation::{ - merge_context::MergeContext, - utils::{cook_operations::cook_operations, elongate_operations::elongate_operations}, + operation_transformation::utils::{ + cook_operations::cook_operations, elongate_operations::elongate_operations, }, tokenizer::{Tokenizer, word_tokenizer::word_tokenizer}, utils::{side::Side, string_builder::StringBuilder}, @@ -31,7 +30,7 @@ where T: PartialEq + Clone + std::fmt::Debug, { text: &'a str, - operations: Vec>, + operations: Vec>, pub(crate) cursors: Vec, } @@ -77,23 +76,7 @@ where /// Create a new `EditedText` with the given operations. /// The operations must be in the order in which they are meant to be /// applied. The operations must not overlap. - fn new( - text: &'a str, - operations: Vec>, - mut cursors: Vec, - ) -> Self { - operations - .iter() - .zip(operations.iter().skip(1)) - .for_each(|(previous, next)| { - debug_assert!( - previous.operation.start_index() <= next.operation.start_index(), - "{} must not come before {} yet it does", - previous.operation, - next.operation - ); - }); - + fn new(text: &'a str, operations: Vec>, mut cursors: Vec) -> Self { cursors.sort_by_key(|cursor| cursor.char_index); Self { @@ -110,14 +93,11 @@ where "`EditedText`-s must be derived from the same text to be mergable" ); - let mut left_merge_context = MergeContext::default(); - let mut right_merge_context = MergeContext::default(); - let mut merged_cursors = Vec::with_capacity(self.cursors.len() + other.cursors.len()); let mut left_cursors = self.cursors.into_iter().peekable(); let mut right_cursors = other.cursors.into_iter().peekable(); - let mut merged_operations: Vec> = + let mut merged_operations: Vec> = Vec::with_capacity(self.operations.len() + other.operations.len()); let mut left_iter = self.operations.into_iter(); @@ -126,89 +106,107 @@ where let mut maybe_left_op = left_iter.next(); let mut maybe_right_op = right_iter.next(); + let mut seen_left_length: usize = 0; + let mut seen_right_length: usize = 0; + let mut merged_length: usize = 0; + + let mut last_left_op = None; + let mut last_right_op = None; + loop { - let (side, OrderedOperation { operation, order }) = + let (side, operation, mut last_other_op) = match (maybe_left_op.clone(), maybe_right_op.clone()) { (Some(left_op), Some(right_op)) => { - if left_op < right_op { - (Side::Left, left_op) + if left_op + .get_sort_key(seen_left_length) + .partial_cmp(&right_op.get_sort_key(seen_right_length)) + == Some(std::cmp::Ordering::Less) + { + (Side::Left, left_op, last_right_op.clone()) } else { - (Side::Right, right_op) + (Side::Right, right_op, last_left_op.clone()) } } - (Some(left_op), None) => (Side::Left, left_op), - (None, Some(right_op)) => (Side::Right, right_op), + (Some(left_op), None) => (Side::Left, left_op, last_right_op.clone()), + (None, Some(right_op)) => (Side::Right, right_op, last_left_op.clone()), (None, None) => break, }; - if side == Side::Left { - maybe_left_op = left_iter.next(); - } else { - maybe_right_op = right_iter.next(); - } + let is_advancing_operation = matches!( + operation, + Operation::Insert { .. } | Operation::Equal { .. } + ); - let original_start = operation.start_index() as i64; - let original_end = operation.end_index(); let original_length = operation.len() as i64; - let result = match side { - Side::Left => operation.merge_operations_with_context( - &mut right_merge_context, - &mut left_merge_context, - ), - Side::Right => operation.merge_operations_with_context( - &mut left_merge_context, - &mut right_merge_context, - ), + Side::Left => { + let result = operation.merge_operations(&mut last_other_op); + + if let ref op @ (Operation::Insert { .. } | Operation::Equal { .. }) = result { + let shift = merged_length as i64 - seen_left_length as i64 + + op.len() as i64 + - original_length; + + while let Some(cursor) = left_cursors.next_if(|cursor| { + cursor.char_index <= seen_left_length + original_length as usize + }) { + merged_cursors.push( + cursor.with_index((cursor.char_index as i64 + shift) as usize), + ); + } + } + + if is_advancing_operation { + seen_left_length += original_length as usize; + } + + maybe_left_op = left_iter.next(); + last_left_op = Some(result.clone()); + + result + } + Side::Right => { + let result = operation.merge_operations(&mut last_other_op); + + if let ref op @ (Operation::Insert { .. } | Operation::Equal { .. }) = result { + let shift = merged_length as i64 - seen_right_length as i64 + + op.len() as i64 + - original_length; + + while let Some(cursor) = right_cursors.next_if(|cursor| { + cursor.char_index <= seen_right_length + original_length as usize + }) { + merged_cursors.push( + cursor.with_index((cursor.char_index as i64 + shift) as usize), + ); + } + } + + if is_advancing_operation { + seen_right_length += original_length as usize; + } + + maybe_right_op = right_iter.next(); + last_right_op = Some(result.clone()); + + result + } }; - if let Some(ref op @ (Operation::Insert { .. } | Operation::Equal { .. })) = result { - let shift = - op.start_index() as i64 - original_start + op.len() as i64 - original_length; - match side { - Side::Left => { - while let Some(cursor) = - left_cursors.next_if(|cursor| cursor.char_index <= original_end + 1) - { - merged_cursors.push(cursor.with_index( - (op.start_index() as i64).max(cursor.char_index as i64 + shift) - as usize, - )); - } - } - Side::Right => { - while let Some(cursor) = - right_cursors.next_if(|cursor| cursor.char_index <= original_end + 1) - { - merged_cursors.push(cursor.with_index( - (op.start_index() as i64).max(cursor.char_index as i64 + shift) - as usize, - )); - } - } - } + if result.len() == 0 { + continue; } - merged_operations.extend(result.into_iter().map(|op| OrderedOperation { - order, - operation: op, - })); + if is_advancing_operation { + merged_length += result.len(); + } + + merged_operations.push(result); } - let last_index = merged_operations - .iter() - .filter(|operation| { - matches!( - operation.operation, - Operation::Insert { .. } | Operation::Equal { .. } - ) - }) - .next_back() - .map_or(0, |op| op.operation.end_index()); - for cursor in left_cursors.chain(right_cursors) { - merged_cursors.push(cursor.with_index(last_index)); + merged_cursors.push(cursor.with_index(merged_length)); } Self::new(self.text, merged_operations, merged_cursors) @@ -219,7 +217,7 @@ where pub fn apply(&self) -> String { let mut builder: StringBuilder<'_> = StringBuilder::new(self.text); - for OrderedOperation { operation, .. } in &self.operations { + for operation in &self.operations { builder = operation.apply(builder); } @@ -229,8 +227,6 @@ where #[cfg(test)] mod tests { - use std::env; - use insta::assert_debug_snapshot; use pretty_assertions::assert_eq; diff --git a/src/operation_transformation/merge_context.rs b/src/operation_transformation/merge_context.rs deleted file mode 100644 index 5cf0972..0000000 --- a/src/operation_transformation/merge_context.rs +++ /dev/null @@ -1,73 +0,0 @@ -use core::fmt::Debug; - -use crate::operation_transformation::Operation; - -#[derive(Clone, Debug)] -pub struct MergeContext -where - T: PartialEq + Clone + std::fmt::Debug, -{ - last_operation: Option>, - pub shift: i64, -} - -impl Default for MergeContext -where - T: PartialEq + Clone + std::fmt::Debug, -{ - fn default() -> Self { - MergeContext { - last_operation: None, - shift: 0, - } - } -} - -impl MergeContext -where - T: PartialEq + Clone + std::fmt::Debug, -{ - pub fn last_operation(&self) -> Option<&Operation> { self.last_operation.as_ref() } - - pub fn replace_last_operation(&mut self, operation: Option>) { - self.last_operation = operation; - } - - /// Replace the last delete operation (if there was one) with a new one - /// while applying it to the `shift` in case the last operation - /// was a delete. - pub fn consume_and_replace_last_operation(&mut self, operation: Option>) { - if let Some(Operation::Delete { - deleted_character_count, - .. - }) = self.last_operation.take() - { - self.shift -= deleted_character_count as i64; - } - - self.last_operation = operation; - } - - /// Remove the last operation (if there was one) in case it is behind the - /// threshold operation. This updates the `shift` in case the last operation - /// was a delete. - pub fn consume_last_operation_if_it_is_too_behind(&mut self, threshold_index: i64) { - if let Some(last_operation) = self.last_operation.as_ref() { - if let Operation::Delete { - deleted_character_count, - .. - } = last_operation - { - if threshold_index + self.shift > last_operation.end_index() as i64 { - self.shift -= *deleted_character_count as i64; - self.last_operation = None; - } - } else if let Operation::Insert { .. } = last_operation - && threshold_index + self.shift - last_operation.len() as i64 - > last_operation.end_index() as i64 - { - self.last_operation = None; - } - } - } -} diff --git a/src/operation_transformation/operation.rs b/src/operation_transformation/operation.rs index e194f9c..6efaa91 100644 --- a/src/operation_transformation/operation.rs +++ b/src/operation_transformation/operation.rs @@ -1,10 +1,8 @@ use core::fmt::{Debug, Display}; -use std::ops::Range; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; -use super::merge_context::MergeContext; use crate::{ Token, utils::{ @@ -21,7 +19,7 @@ where T: PartialEq + Clone + std::fmt::Debug, { Equal { - index: usize, + order: usize, length: usize, #[cfg(debug_assertions)] @@ -29,12 +27,12 @@ where }, Insert { - index: usize, + order: usize, text: Vec>, }, Delete { - index: usize, + order: usize, deleted_character_count: usize, #[cfg(debug_assertions)] @@ -49,74 +47,83 @@ where /// Creates an equal operation with the given index. /// This operation is used to indicate that the text at the given index /// is unchanged. - pub fn create_equal(index: usize, length: usize) -> Option { - if length == 0 { - return None; - } - - Some(Operation::Equal { - index, + pub fn create_equal(order: usize, length: usize) -> Self { + Operation::Equal { + order, length, #[cfg(debug_assertions)] text: None, - }) + } } - pub fn create_equal_with_text(index: usize, text: String) -> Option { - if text.is_empty() { - return None; - } - - Some(Operation::Equal { - index, + pub fn create_equal_with_text(order: usize, text: String) -> Self { + Operation::Equal { + order, length: text.chars().count(), #[cfg(debug_assertions)] text: Some(text), - }) + } } /// Creates an insert operation with the given index and text. - /// If the text is empty (meaning that the operation would be a no-op), - /// returns None. - pub fn create_insert(index: usize, text: Vec>) -> Option { - if text.is_empty() { - return None; - } - - Some(Operation::Insert { index, text }) + pub fn create_insert(order: usize, text: Vec>) -> Self { + Operation::Insert { order, text } } /// Creates a delete operation with the given index and number of - /// to-be-deleted characters. If the operation would delete 0 (meaning - /// that the operation would be a no-op), returns None. - pub fn create_delete(index: usize, deleted_character_count: usize) -> Option { - if deleted_character_count == 0 { - return None; - } - - Some(Operation::Delete { - index, + /// to-be-deleted characters. + pub fn create_delete(order: usize, deleted_character_count: usize) -> Self { + Operation::Delete { + order, deleted_character_count, #[cfg(debug_assertions)] deleted_text: None, - }) + } } - pub fn create_delete_with_text(index: usize, text: String) -> Option { - if text.is_empty() { - return None; - } - - Some(Operation::Delete { - index, + pub fn create_delete_with_text(order: usize, text: String) -> Self { + Operation::Delete { + order, deleted_character_count: text.chars().count(), #[cfg(debug_assertions)] deleted_text: Some(text), - }) + } + } + + fn order(&self) -> usize { + match self { + Operation::Equal { order, .. } + | Operation::Insert { order, .. } + | Operation::Delete { order, .. } => *order, + } + } + + pub fn get_sort_key(&self, insertion_index: usize) -> (usize, usize, usize, String) { + ( + self.order(), + match self { + Operation::Delete { .. } => 1, + Operation::Insert { .. } => 2, + Operation::Equal { .. } => 3, + }, + insertion_index, + // Make sure that the ordering is deterministic regardless of which text + // is left or right. + match self { + Operation::Equal { length, .. } => length.to_string(), + Operation::Insert { text, .. } => { + text.iter().map(Token::original).collect::() + } + Operation::Delete { + deleted_character_count, + .. + } => deleted_character_count.to_string(), + }, + ) } /// Applies the operation to the given `StringBuilder`, returning the @@ -129,63 +136,47 @@ where Operation::Equal { #[cfg(debug_assertions)] text, + length, .. } => { #[cfg(debug_assertions)] debug_assert!( text.as_ref() - .is_none_or(|text| builder.get_slice(self.range()) == *text), - "Text which is supposed to be equal does not match the text in the range" + .is_none_or(|text| builder.get_slice_from_remaining(self.len()) == *text), + "Text (`{}`) which is supposed to be equal does not match the text in the \ + range: `{}`", + text.as_ref().unwrap_or(&String::new()), + builder.get_slice_from_remaining(self.len()) ); - return builder; + builder.retain(*length); + } + Operation::Insert { text, .. } => { + builder.insert(&text.iter().map(Token::original).collect::()); } - Operation::Insert { text, .. } => builder.insert( - self.start_index(), - &text.iter().map(Token::original).collect::(), - ), Operation::Delete { #[cfg(debug_assertions)] deleted_text, + deleted_character_count, .. } => { #[cfg(debug_assertions)] debug_assert!( deleted_text .as_ref() - .is_none_or(|text| builder.get_slice(self.range()) == *text), - "Text to delete does not match the text in the range" + .is_none_or(|text| builder.get_slice_from_remaining(self.len()) == *text), + "Text to-be-deleted `{}` does not match the text in the range: `{}`", + deleted_text.as_ref().unwrap_or(&String::new()), + builder.get_slice_from_remaining(self.len()) ); - builder.delete(self.range()); + builder.delete(*deleted_character_count); } } builder } - /// Returns the index of the first character that the operation affects. - pub fn start_index(&self) -> usize { - match self { - Operation::Equal { index, .. } - | Operation::Insert { index, .. } - | Operation::Delete { index, .. } => *index, - } - } - - /// Returns the index of the last character that the operation affects. - pub fn end_index(&self) -> usize { - debug_assert!( - self.len() > 0, - " len() must be greater than 0 because operations must be non-empty" - ); - self.start_index() + self.len() - 1 - } - - /// Returns the range of indices of characters that the operation affects. - #[allow(clippy::range_plus_one)] - pub fn range(&self) -> Range { self.start_index()..self.end_index() + 1 } - /// Returns the number of affected characters. It is always greater than 0 /// because empty operations cannot be created. pub fn len(&self) -> usize { @@ -199,76 +190,17 @@ where } } - /// Creates a new operation with the same type and text but with the given - /// index. - pub fn with_index(self, index: usize) -> Self { - match self { - Operation::Equal { - length, - - #[cfg(debug_assertions)] - text, - .. - } => Operation::Equal { - index, - length, - - #[cfg(debug_assertions)] - text, - }, - Operation::Insert { text, .. } => Operation::Insert { index, text }, - Operation::Delete { - deleted_character_count, - - #[cfg(debug_assertions)] - deleted_text, - .. - } => Operation::Delete { - index, - deleted_character_count, - - #[cfg(debug_assertions)] - deleted_text, - }, - } - } - - /// Creates a new operation with the same type and text but with the index - /// shifted by the given offset. The offset can be negative but the - /// resulting index must be non-negative. - /// - /// # Panics - /// - /// In debug mode, panics if the resulting index is negative. - pub fn with_shifted_index(self, offset: i64) -> Self { - let index = self.start_index() as i64 + offset; - debug_assert!(index >= 0, "Shifted index must be non-negative"); - - self.with_index(index as usize) - } - /// Merges the operation with the given context, producing a new operation /// and updating the context. This implements a comples FSM that handles /// the merging of operations in a way that is consistent with the text. /// The contexts are updated in-place. #[allow(clippy::too_many_lines)] - pub fn merge_operations_with_context( - self, - affecting_context: &mut MergeContext, - produced_context: &mut MergeContext, - ) -> Option> { - affecting_context.consume_last_operation_if_it_is_too_behind(self.start_index() as i64); - let operation = self.with_shifted_index(affecting_context.shift); - - match (operation, affecting_context.last_operation()) { - (operation @ Operation::Insert { .. }, None | Some(Operation::Equal { .. })) => { - produced_context.shift += operation.len() as i64; - produced_context.consume_and_replace_last_operation(Some(operation.clone())); - Some(operation) - } + pub fn merge_operations(self, previous_operation: &mut Option) -> Operation { + let operation = self; + match (operation, previous_operation) { ( - Operation::Insert { text, index }, + Operation::Insert { order, text }, Some(Operation::Insert { text: previous_inserted_text, .. @@ -279,129 +211,113 @@ where // This way, we don't end up duplicating text. let offset_in_tokens = find_longest_prefix_contained_within(previous_inserted_text, &text); - let offset_in_length = text - .iter() - .take(offset_in_tokens) - .map(Token::get_original_length) - .sum::(); - let trimmed_operation = - Operation::create_insert(index, text[offset_in_tokens..].to_vec()); - affecting_context.shift -= offset_in_length as i64; - produced_context.shift += trimmed_operation - .as_ref() - .map(Operation::len) - .unwrap_or_default() as i64; - produced_context.consume_and_replace_last_operation(trimmed_operation.clone()); - - trimmed_operation + Operation::create_insert(order, text[offset_in_tokens..].to_vec()) } ( - operation @ Operation::Delete { .. }, - None | Some(Operation::Insert { .. } | Operation::Equal { .. }), - ) => { - produced_context.consume_and_replace_last_operation(Some(operation.clone())); - Some(operation) - } + Operation::Delete { + order, + deleted_character_count, - ( - operation @ Operation::Insert { .. }, - Some(last_delete @ Operation::Delete { .. }), + #[cfg(debug_assertions)] + deleted_text, + }, + Some(Operation::Delete { + order: last_delete_order, + deleted_character_count: last_delete_deleted_character_count, + .. + }), ) => { - produced_context.shift += operation.len() as i64; + let operation_end_index = order + deleted_character_count; + let last_delete_end_index = + *last_delete_order + *last_delete_deleted_character_count; - debug_assert!( - last_delete.range().contains(&operation.start_index()), - "There is a last delete ({last_delete}) but the operation ({operation}) is \ - not contained in it" + let new_length = deleted_character_count + .min(0.max(operation_end_index as i64 - last_delete_end_index as i64) as usize); + + let overlap = deleted_character_count - new_length; + + #[cfg(debug_assertions)] + let updated_delete = deleted_text.as_ref().map_or_else( + || Operation::create_delete(order + overlap, new_length), + |text| { + Operation::create_delete_with_text( + order + overlap, + text.chars() + .skip(deleted_character_count - new_length) + .collect::(), + ) + }, ); - let difference = operation.start_index() as i64 - last_delete.start_index() as i64; - - let moved_operation = operation.with_index(last_delete.start_index()); - - affecting_context.replace_last_operation(Operation::create_delete( - moved_operation.end_index() + 1, - (last_delete.len() as i64 - difference) as usize, - )); - affecting_context.shift -= difference; - - produced_context.consume_and_replace_last_operation(Some(moved_operation.clone())); - - Some(moved_operation) - } - - ( - operation @ Operation::Delete { .. }, - Some(last_delete @ Operation::Delete { .. }), - ) => { - debug_assert!( - last_delete.range().contains(&operation.start_index()), - "There is a last delete ({last_delete}) but the operation ({operation}) is \ - not contained in it" - ); - - let difference = operation.start_index() as i64 - last_delete.start_index() as i64; - - let updated_delete = Operation::create_delete( - last_delete.start_index(), - 0.max(operation.end_index() as i64 - last_delete.end_index() as i64) as usize, - ); - - affecting_context.replace_last_operation(Operation::create_delete( - last_delete.start_index(), - 0.max(last_delete.end_index() as i64 - operation.end_index() as i64) as usize, - )); - affecting_context.shift -= difference; - - produced_context.consume_and_replace_last_operation(updated_delete.clone()); + #[cfg(not(debug_assertions))] + let updated_delete = Operation::create_delete(order + overlap, new_length); updated_delete } + ( - ref operation @ Operation::Equal { + Operation::Equal { + order, length, + #[cfg(debug_assertions)] ref text, - .. }, - Some(last_delete @ Operation::Delete { .. }), + Some(Operation::Delete { + order: last_delete_order, + deleted_character_count: last_delete_deleted_character_count, + .. + }), ) => { - debug_assert!( - last_delete.range().contains(&operation.start_index()), - "There is a last delete ({last_delete}) but the operation ({operation}) is \ - not contained in it" - ); + let last_delete_end_index = + *last_delete_order + *last_delete_deleted_character_count; - let overlap = (length as i64) - .min(last_delete.end_index() as i64 - operation.start_index() as i64 + 1); + let overlap = + 0.max((length as i64).min(last_delete_end_index as i64 - order as i64)); #[cfg(debug_assertions)] - let result = text.as_ref().map_or_else( + let updated_equal = text.as_ref().map_or_else( || { Operation::create_equal( - operation.end_index().min(last_delete.end_index()), + order + overlap as usize, (length as i64 - overlap) as usize, ) }, |text| { Operation::create_equal_with_text( - operation.end_index().min(last_delete.end_index()), + order + overlap as usize, text.chars().skip(overlap as usize).collect::(), ) }, ); #[cfg(not(debug_assertions))] - let result = Operation::create_equal( - operation.end_index().min(last_delete.end_index()), + let updated_equal = Operation::create_equal( + order + overlap as usize, (length as i64 - overlap) as usize, ); - result + updated_equal } - (operation @ Operation::Equal { .. }, _) => Some(operation), + + ( + ref operation @ Operation::Equal { ref order, .. }, + Some(Operation::Equal { + order: last_equal_order, + length: last_equal_length, + .. + }), + ) => { + if operation.len() == *last_equal_length && *order == *last_equal_order { + Operation::create_equal(*order, 0) + } else { + operation.clone() + } + } + + (operation, _) => operation, } } } @@ -413,7 +329,7 @@ where fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { match self { Operation::Equal { - index, + order, length, #[cfg(debug_assertions)] @@ -422,28 +338,29 @@ where #[cfg(debug_assertions)] write!( f, - "", + "", text.as_ref() - .map(|text| format!("'{text}'")) + .map(|text| format!("'{}'", text.replace('\n', "\\n"))) .unwrap_or(format!("{length} characters")), - index )?; #[cfg(not(debug_assertions))] - write!(f, "")?; + write!(f, "")?; Ok(()) } - Operation::Insert { index, text } => { + Operation::Insert { order, text } => { write!( f, - "", - text.iter().map(Token::original).collect::(), - index + "", + text.iter() + .map(Token::original) + .collect::() + .replace('\n', "\\n"), ) } Operation::Delete { - index, + order, deleted_character_count, #[cfg(debug_assertions)] @@ -452,18 +369,17 @@ where #[cfg(debug_assertions)] write!( f, - "", + "", deleted_text .as_ref() - .map(|text| format!("'{text}'")) + .map(|text| format!("'{}'", text.replace('\n', "\\n"))) .unwrap_or(format!("{deleted_character_count} characters")), - index )?; #[cfg(not(debug_assertions))] write!( f, - "", + "", )?; Ok(()) @@ -485,29 +401,28 @@ mod tests { use super::*; - #[test] - #[should_panic(expected = "Shifted index must be non-negative")] - fn test_shifting_error() { - insta::assert_debug_snapshot!( - Operation::create_insert(1, vec!["hi".into()]) - .unwrap() - .with_shifted_index(-2) - ); - } - #[test] fn test_apply_delete_with_create() { let builder = StringBuilder::new("hello world"); - let operation = Operation::<()>::create_delete_with_text(5, " world".to_owned()).unwrap(); + let delete_operation = Operation::<()>::create_delete_with_text(0, "hello ".to_owned()); + let retain_operation = Operation::<()>::create_equal(6, 5); - assert_eq!(operation.apply(builder).build(), "hello"); + let mut builder = delete_operation.apply(builder); + builder = retain_operation.apply(builder); + + assert_eq!(builder.build(), "world"); } #[test] fn test_apply_insert() { let builder = StringBuilder::new("hello"); - let operation = Operation::create_insert(5, vec![" my friend".into()]).unwrap(); - assert_eq!(operation.apply(builder).build(), "hello my friend"); + let retain_operation = Operation::<()>::create_equal(0, 5); + let insert_operation = Operation::create_insert(5, vec![" my friend".into()]); + + let mut builder = retain_operation.apply(builder); + builder = insert_operation.apply(builder); + + assert_eq!(builder.build(), "hello my friend"); } } diff --git a/src/operation_transformation/ordered_operation.rs b/src/operation_transformation/ordered_operation.rs deleted file mode 100644 index 6a668e2..0000000 --- a/src/operation_transformation/ordered_operation.rs +++ /dev/null @@ -1,48 +0,0 @@ -#[cfg(feature = "serde")] -use serde::{Deserialize, Serialize}; - -use crate::operation_transformation::Operation; - -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[derive(Debug, Clone, PartialEq)] -pub struct OrderedOperation -where - T: PartialEq + Clone + std::fmt::Debug, -{ - pub order: usize, - pub operation: Operation, -} - -impl OrderedOperation -where - T: PartialEq + Clone + std::fmt::Debug, -{ - pub fn get_sort_key(&self) -> (usize, usize, String) { - ( - self.order, - self.operation.start_index(), - // Make sure that the ordering is deterministic regardless of which text - // is left or right. - match &self.operation { - Operation::Equal { index, .. } => index.to_string(), - Operation::Insert { text, .. } => text - .iter() - .map(crate::tokenizer::token::Token::original) - .collect::(), - Operation::Delete { - deleted_character_count, - .. - } => deleted_character_count.to_string(), - }, - ) - } -} - -impl PartialOrd for OrderedOperation -where - T: PartialEq + Clone + std::fmt::Debug, -{ - fn partial_cmp(&self, other: &Self) -> Option { - self.get_sort_key().partial_cmp(&other.get_sort_key()) - } -} diff --git a/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_text__tests__calculate_operations.snap b/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_text__tests__calculate_operations.snap index 246b2fe..abbabbd 100644 --- a/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_text__tests__calculate_operations.snap +++ b/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_text__tests__calculate_operations.snap @@ -1,43 +1,19 @@ --- -source: reconcile/src/operation_transformation/edited_text.rs +source: src/operation_transformation/edited_text.rs expression: operations snapshot_kind: text --- EditedText { text: "hello world! How are you? Adam", operations: [ - OrderedOperation { - order: 0, - operation: , - }, - OrderedOperation { - order: 0, - operation: , - }, - OrderedOperation { - order: 12, - operation: , - }, - OrderedOperation { - order: 13, - operation: , - }, - OrderedOperation { - order: 16, - operation: , - }, - OrderedOperation { - order: 17, - operation: , - }, - OrderedOperation { - order: 20, - operation: , - }, - OrderedOperation { - order: 20, - operation: , - }, + , + , + , + , + , + , + , + , ], cursors: [], } diff --git a/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_text__tests__calculate_operations_with_no_diff.snap b/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_text__tests__calculate_operations_with_no_diff.snap index 33414f8..275a552 100644 --- a/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_text__tests__calculate_operations_with_no_diff.snap +++ b/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_text__tests__calculate_operations_with_no_diff.snap @@ -1,23 +1,14 @@ --- -source: reconcile/src/operation_transformation/edited_text.rs +source: src/operation_transformation/edited_text.rs expression: operations snapshot_kind: text --- EditedText { text: "hello world!", operations: [ - OrderedOperation { - order: 0, - operation: , - }, - OrderedOperation { - order: 5, - operation: , - }, - OrderedOperation { - order: 6, - operation: , - }, + , + , + , ], cursors: [], } diff --git a/src/operation_transformation/utils/cook_operations.rs b/src/operation_transformation/utils/cook_operations.rs index 4405689..7d5f85e 100644 --- a/src/operation_transformation/utils/cook_operations.rs +++ b/src/operation_transformation/utils/cook_operations.rs @@ -1,52 +1,44 @@ -use crate::{ - diffs::raw_operation::RawOperation, - operation_transformation::{Operation, ordered_operation::OrderedOperation}, -}; +use crate::{diffs::raw_operation::RawOperation, operation_transformation::Operation}; -/// Turn raw operations into ordered operations while keeping track of old & new -/// indexes. -pub fn cook_operations(raw_operations: I) -> impl Iterator> +/// Turn raw operations into ordered operations while keeping track of the +/// original token's indexes. +pub fn cook_operations(raw_operations: I) -> impl Iterator> where I: IntoIterator>, T: PartialEq + Clone + std::fmt::Debug, { - let mut new_index = 0; // this is the start index of the operation on the new text - let mut order = 0; // this is the start index of the operation on the original text + let mut original_text_index = 0; // this is the start index of the operation on the original text - raw_operations.into_iter().filter_map(move |raw_operation| { + raw_operations.into_iter().map(move |raw_operation| { let length = raw_operation.original_text_length(); match raw_operation { RawOperation::Equal(..) => { let op = if cfg!(debug_assertions) { - Operation::create_equal_with_text(new_index, raw_operation.get_original_text()) + Operation::create_equal_with_text( + original_text_index, + raw_operation.get_original_text(), + ) } else { - Operation::create_equal(new_index, length) - } - .map(|operation| OrderedOperation { order, operation }); + Operation::create_equal(original_text_index, length) + }; - new_index += length; - order += length; - - op - } - RawOperation::Insert(tokens) => { - let op = Operation::create_insert(new_index, tokens) - .map(|operation| OrderedOperation { order, operation }); - - new_index += length; + original_text_index += length; op } + RawOperation::Insert(tokens) => Operation::create_insert(original_text_index, tokens), RawOperation::Delete(..) => { let op = if cfg!(debug_assertions) { - Operation::create_delete_with_text(new_index, raw_operation.get_original_text()) + Operation::create_delete_with_text( + original_text_index, + raw_operation.get_original_text(), + ) } else { - Operation::create_delete(new_index, length) - } - .map(|operation| OrderedOperation { order, operation }); + Operation::create_delete(original_text_index, length) + }; - order += length; + original_text_index += length; op } diff --git a/src/operation_transformation/utils/elongate_operations.rs b/src/operation_transformation/utils/elongate_operations.rs index 9883791..1a218c6 100644 --- a/src/operation_transformation/utils/elongate_operations.rs +++ b/src/operation_transformation/utils/elongate_operations.rs @@ -17,6 +17,8 @@ where let mut maybe_previous_insert: Option> = None; let mut maybe_previous_delete: Option> = None; + // We don't elongate `equals` as they're needed to maintain cursor positions + // when merging against deletes. let mut result: Vec> = raw_operations .into_iter() .flat_map(|next| match next { @@ -41,87 +43,80 @@ where } }, RawOperation::Equal(..) => Box::new( - maybe_previous_insert + maybe_previous_delete .take() .into_iter() - .chain(maybe_previous_delete.take()) + .chain(maybe_previous_insert.take()) .chain(iter::once(next)), ) as Box>>, }) .collect(); - if let Some(prev) = maybe_previous_insert { + if let Some(prev) = maybe_previous_delete { result.push(prev); } - if let Some(prev) = maybe_previous_delete { + if let Some(prev) = maybe_previous_insert { result.push(prev); } result } -// #[cfg(test)] -// mod tests { +#[cfg(test)] +mod tests { + use super::*; + use crate::tokenizer::token::Token; -// use super::*; + // Helper constructors for cleaner tests + fn ins(texts: &[&str]) -> RawOperation { + RawOperation::Insert(texts.iter().map(|t| Token::from(*t)).collect()) + } -// #[test] -// fn test_elongate_operations_empty() { -// let operations: Vec> = vec![]; -// let result = elongate_operations(operations); -// assert_eq!(result, vec![]); -// } + fn del(texts: &[&str]) -> RawOperation { + RawOperation::Delete(texts.iter().map(|t| Token::from(*t)).collect()) + } -// #[test] -// fn test_elongate_operations_single_operation() { -// let operations = vec![RawOperation::Insert(vec!["test".into()])]; -// let result = elongate_operations(operations); -// assert_eq!(result.len(), 1); -// assert!(matches!(result[0], RawOperation::Insert(_))); -// } + fn ins_custom(text: &str, lj: bool, rj: bool) -> RawOperation { + RawOperation::Insert(vec![Token::new(text.to_string(), text.to_string(), lj, rj)]) + } -// #[test] -// fn test_elongate_operations_interleaved() { -// let operations = vec![ -// RawOperation::Insert(vec!["a".into()]), -// RawOperation::Delete(vec!["b".into()]), -// RawOperation::Insert(vec!["c".into()]), -// RawOperation::Delete(vec!["d".into()]), -// ]; -// let result = elongate_operations(operations); -// assert_eq!(result.len(), 2); -// assert!(matches!(result[0], RawOperation::Insert(_))); -// assert!(matches!(result[1], RawOperation::Delete(_))); -// } + #[test] + fn merges_adjacent_joinable_inserts() { + let ops = vec![ins(&["a"]), ins(&["b"]), ins(&["c"])]; + let result = elongate_operations(ops); + assert_eq!(result.len(), 1); + match &result[0] { + RawOperation::Insert(tokens) => { + let originals: String = tokens.iter().map(|t| t.original()).collect(); + assert_eq!(originals, "abc"); + } + _ => panic!("Expected single Insert operation"), + } + } -// #[test] -// fn test_elongate_operations_with_equal() { -// let operations = vec![ -// RawOperation::Equal(vec!["a".into()]), -// RawOperation::Equal(vec!["b".into()]), -// RawOperation::Insert(vec!["c".into()]), -// RawOperation::Insert(vec!["d".into()]), -// ]; -// let result = elongate_operations(operations); -// assert_eq!(result.len(), 2); -// assert!(matches!(result[0], RawOperation::Equal(_))); -// assert!(matches!(result[1], RawOperation::Insert(_))); -// } + #[test] + fn does_not_merge_when_not_joinable() { + let ops = vec![ + ins_custom("a", true, false), // not right-joinable + ins_custom("b", true, true), // left-joinable but previous isn't right-joinable + ]; + let result = elongate_operations(ops); + assert_eq!( + result.len(), + 2, + "Operations should remain separate when not joinable" + ); + } -// #[test] -// fn test_elongate_operations_mixed_sequence() { -// let operations = vec![ -// RawOperation::Insert(vec!["a".into()]), -// RawOperation::Equal(vec!["b".into()]), -// RawOperation::Delete(vec!["c".into()]), -// RawOperation::Equal(vec!["d".into()]), -// ]; -// let result = elongate_operations(operations); -// assert_eq!(result.len(), 4); -// assert!(matches!(result[0], RawOperation::Insert(_))); -// assert!(matches!(result[1], RawOperation::Equal(_))); -// assert!(matches!(result[2], RawOperation::Delete(_))); -// assert!(matches!(result[3], RawOperation::Equal(_))); -// } -// } + #[test] + fn merges_interleaved_insert_delete_sequences() { + // Pattern IDID -> II DD + let ops = vec![ins(&["i1"]), del(&["d1"]), ins(&["i2"]), del(&["d2"])]; + let result = elongate_operations(ops); + + assert_eq!(result.len(), 2); + assert!(matches!(result[0], RawOperation::Delete(_))); + assert!(matches!(result[1], RawOperation::Insert(_))); + } +} diff --git a/src/utils/find_longest_prefix_contained_within.rs b/src/utils/find_longest_prefix_contained_within.rs index eb4b826..a04da4e 100644 --- a/src/utils/find_longest_prefix_contained_within.rs +++ b/src/utils/find_longest_prefix_contained_within.rs @@ -9,20 +9,20 @@ use crate::Token; /// old: [0, 1, 9, 0, 2, 5] /// new: [9, 0, 2, 5, 1] /// ``` -/// > results in an length of 4 +/// > results in a length of 4 /// /// /// ```not_rust /// old: [0, 1, 9, 0, 2, 5] /// new: [0, 2] /// ``` -/// > results in an length of 2 +/// > results in a length of 2 /// /// ```not_rust /// old: [0, 1, 9, 0, 2, 5] /// new: [0, 4] /// ``` -/// > results in an length of 1 +/// > results in a length of 1 pub fn find_longest_prefix_contained_within(old: &[Token], new: &[Token]) -> usize where T: PartialEq + Clone + std::fmt::Debug, diff --git a/src/utils/string_builder.rs b/src/utils/string_builder.rs index b19bcbb..af24d82 100644 --- a/src/utils/string_builder.rs +++ b/src/utils/string_builder.rs @@ -1,83 +1,67 @@ -use core::ops::Range; +use std::iter::Iterator; -/// A helper for building a string in order based on an original string and a -/// series of insertions and deletions applied to it. It is safe to use with -/// UTF-8 strings as all operations are based on character indices. -#[derive(Debug, Clone)] +/// A helper for building a string in-order based on an original string and a +/// series of insertions, deletions, and copies applied to it. It is safe to use +/// with UTF-8 strings as all operations are based on character indices. The +/// methods must be called in-order. pub struct StringBuilder<'a> { - original: &'a str, - last_old_char_index: usize, + original: Box + 'a>, buffer: String, + + #[cfg(debug_assertions)] + remaining: String, } impl StringBuilder<'_> { pub fn new(original: &str) -> StringBuilder<'_> { StringBuilder { - original, - last_old_char_index: 0, + original: Box::new(original.chars()), buffer: String::with_capacity(original.len()), + + #[cfg(debug_assertions)] + remaining: original.to_owned(), } } - /// Insert a string at the given index after copying the original string up - /// to that index from the last insertion or deletion. - pub fn insert(&mut self, from: usize, text: &str) { - self.copy_until(from); - self.buffer.push_str(text); + /// Insert a string at the end of the built buffer. + pub fn insert(&mut self, text: &str) { self.buffer.push_str(text); } + + /// Skip copying `length` characters from the original string to the built + /// buffer. + pub fn delete(&mut self, length: usize) { + if length == 0 { + return; + } + + self.original.nth(length - 1); + + if cfg!(debug_assertions) { + self.remaining = self.remaining.chars().skip(length).collect(); + } } - /// Delete a string at the given index after copying the original string up - /// to that index from the last insertion or deletion. - pub fn delete(&mut self, range: core::ops::Range) { - self.copy_until(range.start); - self.last_old_char_index += range.len(); - } + /// Copy `length` characters from the original string to the built buffer. + pub fn retain(&mut self, length: usize) { + self.buffer.extend(self.original.by_ref().take(length)); - fn copy_until(&mut self, index: usize) { - let current_char_count = self.buffer.chars().count(); - debug_assert!( - index >= current_char_count, - "String builder only support building in order" - ); - - let jump = index - current_char_count; - - self.buffer.push_str( - &self - .original - .chars() - .skip(self.last_old_char_index) - .take(jump) - .collect::(), - ); - self.last_old_char_index += jump; + if cfg!(debug_assertions) { + self.remaining = self.remaining.chars().skip(length).collect(); + } } /// Finish building the string after copying the remaining original string /// since the last insertion or deletion. - pub fn build(mut self) -> String { - self.buffer.push_str( - &self - .original - .chars() - .skip(self.last_old_char_index) - .collect::(), - ); + pub fn build(self) -> String { self.buffer } - self.buffer - } + #[cfg(debug_assertions)] + /// Get a slice of the remaining original string. The slice starts from + /// where the next delete/retain operation would start and is of length + /// `length`. The implementation is quite suboptimal but it's only used + /// for debugging. + pub fn get_slice_from_remaining(&self, length: usize) -> String { + let result = self.remaining.chars().take(length).collect::(); - #[allow(dead_code)] - pub fn get_slice(&self, range: Range) -> String { - let result = self - .buffer - .chars() - .chain(self.original.chars().skip(self.last_old_char_index)) - .skip(range.start) - .take(range.end - range.start) - .collect::(); - - debug_assert_eq!(result.chars().count(), range.len(), "Range out of bounds",); + debug_assert_eq!(result.chars().count(), length, "Range out of bounds"); result } @@ -85,6 +69,8 @@ impl StringBuilder<'_> { #[cfg(test)] mod tests { + use pretty_assertions::assert_eq; + use super::*; #[test] @@ -92,20 +78,74 @@ mod tests { let original = "aaa bbb ccc"; let mut builder = StringBuilder::new(original); - builder.insert(0, "ddd "); - builder.delete(4..8); - builder.insert(11, " eee"); + builder.insert("ddd"); + builder.delete(3); + builder.retain(8); + builder.insert(" eee"); assert_eq!(builder.build(), "ddd bbb ccc eee"); - } - #[test] - fn test_string_builder2() { let original = "abcde"; let mut builder = StringBuilder::new(original); - builder.delete(1..4); + builder.retain(1); + builder.delete(3); + builder.retain(1); assert_eq!(builder.build(), "ae"); } + + #[test] + fn test_empty_original() { + let original = ""; + let mut builder = StringBuilder::new(original); + + builder.insert("test"); + assert_eq!(builder.build(), "test"); + } + + #[test] + fn test_unicode_characters() { + let original = "こんにちは"; + let mut builder = StringBuilder::new(original); + + builder.retain(3); + builder.insert("世界, "); // Insert "World, " + builder.retain(2); + + assert_eq!(builder.build(), "こんに世界, ちは"); + } + + #[test] + fn test_get_slice() { + let original = "abcdef"; + let builder = StringBuilder::new(original); + + // Test getting a slice of the original string + assert_eq!(builder.get_slice_from_remaining(3), "abc"); + + // Test getting a slice that includes both buffer and remaining original + let mut builder = StringBuilder::new(original); + builder.retain(2); // "ab" in buffer + assert_eq!(builder.get_slice_from_remaining(2), "cd"); + } + + #[test] + fn test_retain_all() { + let original = "Hello, world!"; + let mut builder = StringBuilder::new(original); + + builder.retain(original.len()); + assert_eq!(builder.build(), original); + } + + #[test] + fn test_delete_all() { + let original = "Hello"; + let mut builder = StringBuilder::new(original); + + builder.delete(original.len()); + builder.insert("Hi"); + assert_eq!(builder.build(), "Hi"); + } } diff --git a/tests/examples/deletes.yml b/tests/examples/deletes.yml index c4c145b..3476e95 100644 --- a/tests/examples/deletes.yml +++ b/tests/examples/deletes.yml @@ -24,6 +24,12 @@ left: long small right: long with big and small expected: long small +--- +parent: long run of text where one barely has no changes but has cursors +left: long| run of tex|t where one barely has no |changes but has |cursors +right: long run one barely has no changes cursors +expected: long| ru|n one barely has no |changes |cursors + --- parent: long text where the cursor has to be clamped after delete left: long text where the cursor has to be clamped after delete| diff --git a/tests/examples/replacing.yml b/tests/examples/replacing.yml index cea57b8..15126d8 100644 --- a/tests/examples/replacing.yml +++ b/tests/examples/replacing.yml @@ -2,7 +2,7 @@ parent: original_1 original_2 original_3 left: original_1 edit_1| original_3 right: original_1 original_2| edit_2 -expected: original_1 edit_1|| edit_2 +expected: original_1| edit_1| edit_2 --- # Both replace the same token with the same value diff --git a/tests/examples/various.yml b/tests/examples/various.yml index cfbeb42..4576edc 100644 --- a/tests/examples/various.yml +++ b/tests/examples/various.yml @@ -43,7 +43,7 @@ expected: Send the |detailed |quarterly |detailed report to the |entire |team parent: Ready, Set go left: Ready! Set go| right: Ready, Set, go!| -expected: Ready! Set, go!|| +expected: Ready!| Set, go!| --- parent: "Total: $100" @@ -67,7 +67,7 @@ expected: market| placemarket|space parent: A B C D left: A X B D| right: A B Y| -expected: A X B |Y| +expected: A X B| Y| --- parent: Please submit your assignment by Friday