From 9da05c6ff6bf3d70a017ca1df326e2632f652d44 Mon Sep 17 00:00:00 2001 From: Andras Schmelczer Date: Sun, 6 Apr 2025 11:39:30 +0100 Subject: [PATCH] Fix cursor moving --- .../reconcile/src/operation_transformation.rs | 4 +- .../src/operation_transformation/cursor.rs | 13 +- .../operation_transformation/edited_text.rs | 142 +++++++++--------- .../src/operation_transformation/operation.rs | 135 ++++++++++++++++- ...ted_text__tests__calculate_operations.snap | 16 ++ ...ts__calculate_operations_with_no_diff.snap | 23 +++ .../reconcile/src/tokenizer/word_tokenizer.rs | 30 ++-- 7 files changed, 269 insertions(+), 94 deletions(-) create mode 100644 backend/reconcile/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_text__tests__calculate_operations_with_no_diff.snap diff --git a/backend/reconcile/src/operation_transformation.rs b/backend/reconcile/src/operation_transformation.rs index 36239df..45575b9 100644 --- a/backend/reconcile/src/operation_transformation.rs +++ b/backend/reconcile/src/operation_transformation.rs @@ -110,8 +110,8 @@ mod test { }, // inside of "s|ample" because "text" got replaced by "sample" CursorPosition { id: 3, - char_index: 31 - }, // before "for" + char_index: 43 + }, // before "cursor movements" ] ) ); diff --git a/backend/reconcile/src/operation_transformation/cursor.rs b/backend/reconcile/src/operation_transformation/cursor.rs index c17f560..a8e74da 100644 --- a/backend/reconcile/src/operation_transformation/cursor.rs +++ b/backend/reconcile/src/operation_transformation/cursor.rs @@ -16,19 +16,10 @@ pub struct CursorPosition { } impl CursorPosition { - #[must_use] - pub fn apply_merge_context(&self, context: &MergeContext) -> Self - where - T: PartialEq + Clone + std::fmt::Debug, - { - let char_index = match context.last_operation() { - Some(Operation::Delete { index, .. }) => (*index) as i64, - _ => self.char_index as i64 + context.shift, - }; - + pub fn with_index(self, index: usize) -> Self { CursorPosition { id: self.id, - char_index: char_index.max(0) as usize, + char_index: index, } } } diff --git a/backend/reconcile/src/operation_transformation/edited_text.rs b/backend/reconcile/src/operation_transformation/edited_text.rs index 12e0ee8..f195a9f 100644 --- a/backend/reconcile/src/operation_transformation/edited_text.rs +++ b/backend/reconcile/src/operation_transformation/edited_text.rs @@ -3,11 +3,11 @@ use core::iter; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; -use super::{CursorPosition, Operation, TextWithCursors, ordered_operation::OrderedOperation}; +use super::{ordered_operation::OrderedOperation, CursorPosition, Operation, TextWithCursors}; use crate::{ diffs::{myers::diff, raw_operation::RawOperation}, - operation_transformation::merge_context::MergeContext, - tokenizer::{Tokenizer, word_tokenizer::word_tokenizer}, + operation_transformation::{merge_context::MergeContext, operation}, + tokenizer::{word_tokenizer::word_tokenizer, Tokenizer}, utils::{merge_iters::MergeSorted as _, side::Side, string_builder::StringBuilder}, }; @@ -72,6 +72,10 @@ where where I: IntoIterator>, { + // This might look bad, but this makes sense. The inserts and deltes can be + // interleaved, such as: IDIDID and we need to turn this into IIIDDD. + // So we need to keep track of both the last insert and delete operations, not + // just the last one. let mut maybe_previous_insert: Option> = None; let mut maybe_previous_delete: Option> = None; @@ -132,12 +136,22 @@ where raw_operations.into_iter().flat_map(move |raw_operation| { let length = raw_operation.original_text_length(); - let operation = match raw_operation { + match raw_operation { RawOperation::Equal(..) => { + let op = if cfg!(debug_assertions) { + Operation::create_equal_with_text( + new_index, + raw_operation.get_original_text(), + ) + } else { + Operation::create_equal(new_index, length) + } + .map(|operation| OrderedOperation { order, operation }); + new_index += length; order += length; - None + op } RawOperation::Insert(tokens) => { let op = Operation::create_insert(new_index, tokens) @@ -162,9 +176,7 @@ where op } - }; - - operation.into_iter() + } }) } @@ -208,10 +220,10 @@ where let mut right_merge_context = MergeContext::default(); let mut merged_cursors = Vec::with_capacity(self.cursors.len() + other.cursors.len()); - let mut left_cursors = self.cursors.iter().peekable(); - let mut right_cursors = other.cursors.iter().peekable(); + let mut left_cursors = self.cursors.into_iter().peekable(); + let mut right_cursors = other.cursors.into_iter().peekable(); - let merged_operations = self + let merged_operations: Vec> = self .operations .into_iter() // The current text is always the left; the other operation is the right side. @@ -221,13 +233,11 @@ where |(operation, _)| { ( operation.order, - // Operations on the left and right must come in the same order so that - // inserts can be merged with other inserts and deletes with deletes. - usize::from(matches!(operation.operation, Operation::Delete { .. })), operation.operation.start_index(), // Make sure that the ordering is deterministic regardless which text // is left or right. match &operation.operation { + Operation::Equal { index, .. } => index.to_string(), Operation::Insert { text, .. } => text .iter() .map(super::super::tokenizer::token::Token::original) @@ -241,58 +251,55 @@ where }, ) .flat_map(|(OrderedOperation { order, operation }, side)| { + let original_start = operation.start_index() as i64; + let original_end = operation.end_index(); + match side { Side::Left => { - while let Some(cursor) = left_cursors - .next_if(|cursor| cursor.char_index <= operation.start_index()) - { - right_merge_context.consume_last_operation_if_it_is_too_behind( - cursor.char_index as i64, - ); - merged_cursors.push(cursor.apply_merge_context(&right_merge_context)); - } - - while let Some(cursor) = right_cursors.next_if(|cursor| { - cursor.char_index as i64 - <= operation.start_index() as i64 + right_merge_context.shift - - left_merge_context.shift - }) { - left_merge_context.consume_last_operation_if_it_is_too_behind( - cursor.char_index as i64, - ); - merged_cursors.push(cursor.apply_merge_context(&left_merge_context)); - } - - operation.merge_operations_with_context( + let result = operation.merge_operations_with_context( &mut right_merge_context, &mut left_merge_context, - ) + ); + + if let Some(ref op @ Operation::Insert { .. }) + | Some(ref op @ Operation::Equal { .. }) = result + { + while let Some(mut cursor) = + left_cursors.next_if(|cursor| cursor.char_index <= original_end + 1) + { + let shift = op.start_index() as i64 - original_start; + + cursor.char_index = (op.start_index() as i64) + .max(cursor.char_index as i64 + shift) + as usize; + merged_cursors.push(cursor); + } + } + + result } Side::Right => { - while let Some(cursor) = right_cursors - .next_if(|cursor| cursor.char_index <= operation.start_index()) - { - left_merge_context.consume_last_operation_if_it_is_too_behind( - cursor.char_index as i64, - ); - merged_cursors.push(cursor.apply_merge_context(&left_merge_context)); - } - - while let Some(cursor) = left_cursors.next_if(|cursor| { - cursor.char_index as i64 - <= operation.start_index() as i64 + left_merge_context.shift - - right_merge_context.shift - }) { - right_merge_context.consume_last_operation_if_it_is_too_behind( - cursor.char_index as i64, - ); - merged_cursors.push(cursor.apply_merge_context(&right_merge_context)); - } - - operation.merge_operations_with_context( + let result = operation.merge_operations_with_context( &mut left_merge_context, &mut right_merge_context, - ) + ); + + if let Some(ref op @ Operation::Insert { .. }) + | Some(ref op @ Operation::Equal { .. }) = result + { + while let Some(mut cursor) = right_cursors + .next_if(|cursor| cursor.char_index <= original_end + 1) + { + let shift = op.start_index() as i64 - original_start; + + cursor.char_index = (op.start_index() as i64) + .max(cursor.char_index as i64 + shift) + as usize; + merged_cursors.push(cursor); + } + } + + result } } .map(|operation| OrderedOperation { order, operation }) @@ -300,15 +307,14 @@ where }) .collect(); - for cursor in left_cursors { - right_merge_context - .consume_last_operation_if_it_is_too_behind(cursor.char_index as i64); - merged_cursors.push(cursor.apply_merge_context(&right_merge_context)); - } + let last_index = merged_operations + .iter() + .last() + .map(|op| op.operation.end_index()) + .unwrap_or(0); - for cursor in right_cursors { - left_merge_context.consume_last_operation_if_it_is_too_behind(cursor.char_index as i64); - merged_cursors.push(cursor.apply_merge_context(&left_merge_context)); + for cursor in left_cursors.chain(right_cursors) { + merged_cursors.push(cursor.with_index(last_index)); } Self::new(self.text, merged_operations, merged_cursors) @@ -331,6 +337,7 @@ where mod tests { use std::env; + use insta::assert_debug_snapshot; use pretty_assertions::assert_eq; use super::*; @@ -354,10 +361,9 @@ mod tests { let operations = EditedText::from_strings(text, text.into()); - assert_eq!(operations.operations.len(), 0); + assert_debug_snapshot!(operations); let new_right = operations.apply(); - assert_eq!(new_right.to_string(), text); } diff --git a/backend/reconcile/src/operation_transformation/operation.rs b/backend/reconcile/src/operation_transformation/operation.rs index c25871b..313d368 100644 --- a/backend/reconcile/src/operation_transformation/operation.rs +++ b/backend/reconcile/src/operation_transformation/operation.rs @@ -20,6 +20,14 @@ pub enum Operation where T: PartialEq + Clone + std::fmt::Debug, { + Equal { + index: usize, + length: usize, + + #[cfg(debug_assertions)] + text: Option, + }, + Insert { index: usize, text: Vec>, @@ -38,6 +46,37 @@ impl Operation where T: PartialEq + Clone + std::fmt::Debug, { + /// Creates an equal operation with the given index. + /// This operation is used to indicate that the text at the given index + /// is unchanged. + pub fn create_equal(index: usize, length: usize) -> Option { + if length == 0 { + return None; + } + + Some(Operation::Equal { + index, + length, + + #[cfg(debug_assertions)] + text: None, + }) + } + + pub fn create_equal_with_text(index: usize, text: String) -> Option { + if text.is_empty() { + return None; + } + + Some(Operation::Equal { + index, + length: text.chars().count(), + + #[cfg(debug_assertions)] + text: Some(text), + }) + } + /// Creates an insert operation with the given index and text. /// If the text is empty (meaning that the operation would be a no-op), /// returns None. @@ -87,6 +126,20 @@ where /// on a range of text that does not match the text to be deleted. pub fn apply<'a>(&self, mut builder: StringBuilder<'a>) -> StringBuilder<'a> { match self { + Operation::Equal { + #[cfg(debug_assertions)] + text, + .. + } => { + #[cfg(debug_assertions)] + debug_assert!( + text.as_ref() + .is_none_or(|text| builder.get_slice(self.range()) == *text), + "Text which is supposed to be equal does not match the text in the range" + ); + + return builder; + } Operation::Insert { text, .. } => builder.insert( self.start_index(), &text.iter().map(Token::original).collect::(), @@ -114,7 +167,9 @@ where /// Returns the index of the first character that the operation affects. pub fn start_index(&self) -> usize { match self { - Operation::Insert { index, .. } | Operation::Delete { index, .. } => *index, + Operation::Equal { index, .. } + | Operation::Insert { index, .. } + | Operation::Delete { index, .. } => *index, } } @@ -135,6 +190,7 @@ where /// because empty operations cannot be created. pub fn len(&self) -> usize { match self { + Operation::Equal { length, .. } => *length, Operation::Insert { text, .. } => text.iter().map(Token::get_original_length).sum(), Operation::Delete { deleted_character_count, @@ -147,6 +203,19 @@ where /// index. pub fn with_index(self, index: usize) -> Self { match self { + Operation::Equal { + length, + + #[cfg(debug_assertions)] + text, + .. + } => Operation::Equal { + index, + length, + + #[cfg(debug_assertions)] + text, + }, Operation::Insert { text, .. } => Operation::Insert { index, text }, Operation::Delete { deleted_character_count, @@ -191,7 +260,7 @@ where let operation = self.with_shifted_index(affecting_context.shift); match (operation, affecting_context.last_operation()) { - (operation @ Operation::Insert { .. }, None) => { + (operation @ Operation::Insert { .. }, None | Some(Operation::Equal { .. })) => { produced_context.shift += operation.len() as i64; produced_context.consume_and_replace_last_operation(Some(operation.clone())); Some(operation) @@ -227,7 +296,10 @@ where trimmed_operation } - (operation @ Operation::Delete { .. }, None | Some(Operation::Insert { .. })) => { + ( + operation @ Operation::Delete { .. }, + None | Some(Operation::Insert { .. }) | Some(Operation::Equal { .. }), + ) => { produced_context.consume_and_replace_last_operation(Some(operation.clone())); Some(operation) } @@ -286,6 +358,41 @@ where updated_delete } + ( + ref operation @ Operation::Equal { + length, + #[cfg(debug_assertions)] + ref text, + .. + }, + Some(last_delete @ Operation::Delete { .. }), + ) => { + debug_assert!( + last_delete.range().contains(&operation.start_index()), + "There is a last delete ({last_delete}) but the operation ({operation}) is \ + not contained in it" + ); + + let overlap = (length as i64) + .min(last_delete.end_index() as i64 - operation.start_index() as i64 + 1); + + if cfg!(debug_assertions) && text.is_some() { + Operation::create_equal_with_text( + operation.end_index().min(last_delete.end_index()), + text.clone() + .unwrap() + .chars() + .skip(overlap as usize) + .collect::(), + ) + } else { + Operation::create_equal( + operation.end_index().min(last_delete.end_index()), + (length as i64 - overlap) as usize, + ) + } + } + (operation @ Operation::Equal { .. }, _) => Some(operation), } } } @@ -296,6 +403,28 @@ where { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { match self { + Operation::Equal { + index, + length, + + #[cfg(debug_assertions)] + text, + } => { + #[cfg(debug_assertions)] + write!( + f, + "", + text.as_ref() + .map(|text| format!("'{text}'")) + .unwrap_or(format!("{length} characters")), + index + )?; + + #[cfg(not(debug_assertions))] + write!(f, "")?; + + Ok(()) + } Operation::Insert { index, text } => { write!( f, diff --git a/backend/reconcile/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_text__tests__calculate_operations.snap b/backend/reconcile/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_text__tests__calculate_operations.snap index f08083f..246b2fe 100644 --- a/backend/reconcile/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_text__tests__calculate_operations.snap +++ b/backend/reconcile/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_text__tests__calculate_operations.snap @@ -14,6 +14,22 @@ EditedText { order: 0, operation: , }, + OrderedOperation { + order: 12, + operation: , + }, + OrderedOperation { + order: 13, + operation: , + }, + OrderedOperation { + order: 16, + operation: , + }, + OrderedOperation { + order: 17, + operation: , + }, OrderedOperation { order: 20, operation: , diff --git a/backend/reconcile/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_text__tests__calculate_operations_with_no_diff.snap b/backend/reconcile/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_text__tests__calculate_operations_with_no_diff.snap new file mode 100644 index 0000000..33414f8 --- /dev/null +++ b/backend/reconcile/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_text__tests__calculate_operations_with_no_diff.snap @@ -0,0 +1,23 @@ +--- +source: reconcile/src/operation_transformation/edited_text.rs +expression: operations +snapshot_kind: text +--- +EditedText { + text: "hello world!", + operations: [ + OrderedOperation { + order: 0, + operation: , + }, + OrderedOperation { + order: 5, + operation: , + }, + OrderedOperation { + order: 6, + operation: , + }, + ], + cursors: [], +} diff --git a/backend/reconcile/src/tokenizer/word_tokenizer.rs b/backend/reconcile/src/tokenizer/word_tokenizer.rs index 37d748b..8ebff6a 100644 --- a/backend/reconcile/src/tokenizer/word_tokenizer.rs +++ b/backend/reconcile/src/tokenizer/word_tokenizer.rs @@ -1,29 +1,39 @@ use super::token::Token; -/// Splits on whitespace keeping the leading whitespace. +/// Splits on word boundaries creating alternating words and whitespaces with +/// the whitesspaces getting unique IDs. /// -/// /// ## Example /// -/// "Hi there!" -> ["Hi", " there!"] +/// "Hi there!" -> ["Hi", " " ", "there!"] pub fn word_tokenizer(text: &str) -> Vec> { let mut result: Vec> = Vec::new(); - let mut last_whitespace = 0; - let mut previous_char_is_whitespace = true; + let mut previous_boundary_index = 0; + let mut previous_char_is_whitespace = text.chars().next().map_or(true, |c| c.is_whitespace()); for (i, c) in text.char_indices() { let is_current_char_whitespace = c.is_whitespace(); - if !previous_char_is_whitespace && is_current_char_whitespace { - result.push(text[last_whitespace..i].into()); - last_whitespace = i; + if previous_char_is_whitespace != is_current_char_whitespace { + result.push(text[previous_boundary_index..i].into()); + previous_boundary_index = i; } previous_char_is_whitespace = is_current_char_whitespace; } - if last_whitespace < text.len() { - result.push(text[last_whitespace..].into()); + if previous_boundary_index < text.len() { + result.push(text[previous_boundary_index..].into()); + } + + if result.is_empty() { + return result; + } + + for i in 0..result.len() - 1 { + if result[i].original().chars().all(|c| c.is_whitespace()) { + result[i].normalised = result[i].normalised().to_owned() + result[i + 1].original() + } } result