Fix cursor moving

2025-04-06 11:39:30 +01:00 · 2025-04-06 11:39:30 +01:00 · 9da05c6ff6
commit 9da05c6ff6
parent f509a60f0a
7 changed files with 269 additions and 94 deletions
--- a/backend/reconcile/src/operation_transformation.rs
+++ b/backend/reconcile/src/operation_transformation.rs
@ -110,8 +110,8 @@ mod test {
                    }, // inside of "s|ample" because "text" got replaced by "sample"
                    CursorPosition {
                        id: 3,
-                        char_index: 31
-                    }, // before "for"
+                        char_index: 43
+                    }, // before "cursor movements"
                ]
            )
        );
--- a/backend/reconcile/src/operation_transformation/cursor.rs
+++ b/backend/reconcile/src/operation_transformation/cursor.rs
@ -16,19 +16,10 @@ pub struct CursorPosition {
 }

 impl CursorPosition {
-    #[must_use]
-    pub fn apply_merge_context<T>(&self, context: &MergeContext<T>) -> Self
-    where
-        T: PartialEq + Clone + std::fmt::Debug,
-    {
-        let char_index = match context.last_operation() {
-            Some(Operation::Delete { index, .. }) => (*index) as i64,
-            _ => self.char_index as i64 + context.shift,
-        };
-
+    pub fn with_index(self, index: usize) -> Self {
        CursorPosition {
            id: self.id,
-            char_index: char_index.max(0) as usize,
+            char_index: index,
        }
    }
 }
--- a/backend/reconcile/src/operation_transformation/edited_text.rs
+++ b/backend/reconcile/src/operation_transformation/edited_text.rs
@ -3,11 +3,11 @@ use core::iter;
 #[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};

-use super::{CursorPosition, Operation, TextWithCursors, ordered_operation::OrderedOperation};
+use super::{ordered_operation::OrderedOperation, CursorPosition, Operation, TextWithCursors};
 use crate::{
    diffs::{myers::diff, raw_operation::RawOperation},
-    operation_transformation::merge_context::MergeContext,
-    tokenizer::{Tokenizer, word_tokenizer::word_tokenizer},
+    operation_transformation::{merge_context::MergeContext, operation},
+    tokenizer::{word_tokenizer::word_tokenizer, Tokenizer},
    utils::{merge_iters::MergeSorted as _, side::Side, string_builder::StringBuilder},
 };

@ -72,6 +72,10 @@ where
    where
        I: IntoIterator<Item = RawOperation<T>>,
    {
+        // This might look bad, but this makes sense. The inserts and deltes can be
+        // interleaved, such as: IDIDID and we need to turn this into IIIDDD.
+        // So we need to keep track of both the last insert and delete operations, not
+        // just the last one.
        let mut maybe_previous_insert: Option<RawOperation<T>> = None;
        let mut maybe_previous_delete: Option<RawOperation<T>> = None;

@ -132,12 +136,22 @@ where
        raw_operations.into_iter().flat_map(move |raw_operation| {
            let length = raw_operation.original_text_length();

-            let operation = match raw_operation {
+            match raw_operation {
                RawOperation::Equal(..) => {
+                    let op = if cfg!(debug_assertions) {
+                        Operation::create_equal_with_text(
+                            new_index,
+                            raw_operation.get_original_text(),
+                        )
+                    } else {
+                        Operation::create_equal(new_index, length)
+                    }
+                    .map(|operation| OrderedOperation { order, operation });
+
                    new_index += length;
                    order += length;

-                    None
+                    op
                }
                RawOperation::Insert(tokens) => {
                    let op = Operation::create_insert(new_index, tokens)
@ -162,9 +176,7 @@ where

                    op
                }
-            };
-
-            operation.into_iter()
+            }
        })
    }

@ -208,10 +220,10 @@ where
        let mut right_merge_context = MergeContext::default();

        let mut merged_cursors = Vec::with_capacity(self.cursors.len() + other.cursors.len());
-        let mut left_cursors = self.cursors.iter().peekable();
-        let mut right_cursors = other.cursors.iter().peekable();
+        let mut left_cursors = self.cursors.into_iter().peekable();
+        let mut right_cursors = other.cursors.into_iter().peekable();

-        let merged_operations = self
+        let merged_operations: Vec<OrderedOperation<T>> = self
            .operations
            .into_iter()
            // The current text is always the left; the other operation is the right side.
@ -221,13 +233,11 @@ where
                |(operation, _)| {
                    (
                        operation.order,
-                        // Operations on the left and right must come in the same order so that
-                        // inserts can be merged with other inserts and deletes with deletes.
-                        usize::from(matches!(operation.operation, Operation::Delete { .. })),
                        operation.operation.start_index(),
                        // Make sure that the ordering is deterministic regardless which text
                        // is left or right.
                        match &operation.operation {
+                            Operation::Equal { index, .. } => index.to_string(),
                            Operation::Insert { text, .. } => text
                                .iter()
                                .map(super::super::tokenizer::token::Token::original)
@ -241,58 +251,55 @@ where
                },
            )
            .flat_map(|(OrderedOperation { order, operation }, side)| {
+                let original_start = operation.start_index() as i64;
+                let original_end = operation.end_index();
+
                match side {
                    Side::Left => {
-                        while let Some(cursor) = left_cursors
-                            .next_if(|cursor| cursor.char_index <= operation.start_index())
-                        {
-                            right_merge_context.consume_last_operation_if_it_is_too_behind(
-                                cursor.char_index as i64,
-                            );
-                            merged_cursors.push(cursor.apply_merge_context(&right_merge_context));
-                        }
-
-                        while let Some(cursor) = right_cursors.next_if(|cursor| {
-                            cursor.char_index as i64
-                                <= operation.start_index() as i64 + right_merge_context.shift
-                                    - left_merge_context.shift
-                        }) {
-                            left_merge_context.consume_last_operation_if_it_is_too_behind(
-                                cursor.char_index as i64,
-                            );
-                            merged_cursors.push(cursor.apply_merge_context(&left_merge_context));
-                        }
-
-                        operation.merge_operations_with_context(
+                        let result = operation.merge_operations_with_context(
                            &mut right_merge_context,
                            &mut left_merge_context,
-                        )
+                        );
+
+                        if let Some(ref op @ Operation::Insert { .. })
+                        | Some(ref op @ Operation::Equal { .. }) = result
+                        {
+                            while let Some(mut cursor) =
+                                left_cursors.next_if(|cursor| cursor.char_index <= original_end + 1)
+                            {
+                                let shift = op.start_index() as i64 - original_start;
+
+                                cursor.char_index = (op.start_index() as i64)
+                                    .max(cursor.char_index as i64 + shift)
+                                    as usize;
+                                merged_cursors.push(cursor);
+                            }
+                        }
+
+                        result
                    }
                    Side::Right => {
-                        while let Some(cursor) = right_cursors
-                            .next_if(|cursor| cursor.char_index <= operation.start_index())
-                        {
-                            left_merge_context.consume_last_operation_if_it_is_too_behind(
-                                cursor.char_index as i64,
-                            );
-                            merged_cursors.push(cursor.apply_merge_context(&left_merge_context));
-                        }
-
-                        while let Some(cursor) = left_cursors.next_if(|cursor| {
-                            cursor.char_index as i64
-                                <= operation.start_index() as i64 + left_merge_context.shift
-                                    - right_merge_context.shift
-                        }) {
-                            right_merge_context.consume_last_operation_if_it_is_too_behind(
-                                cursor.char_index as i64,
-                            );
-                            merged_cursors.push(cursor.apply_merge_context(&right_merge_context));
-                        }
-
-                        operation.merge_operations_with_context(
+                        let result = operation.merge_operations_with_context(
                            &mut left_merge_context,
                            &mut right_merge_context,
-                        )
+                        );
+
+                        if let Some(ref op @ Operation::Insert { .. })
+                        | Some(ref op @ Operation::Equal { .. }) = result
+                        {
+                            while let Some(mut cursor) = right_cursors
+                                .next_if(|cursor| cursor.char_index <= original_end + 1)
+                            {
+                                let shift = op.start_index() as i64 - original_start;
+
+                                cursor.char_index = (op.start_index() as i64)
+                                    .max(cursor.char_index as i64 + shift)
+                                    as usize;
+                                merged_cursors.push(cursor);
+                            }
+                        }
+
+                        result
                    }
                }
                .map(|operation| OrderedOperation { order, operation })
@ -300,15 +307,14 @@ where
            })
            .collect();

-        for cursor in left_cursors {
-            right_merge_context
-                .consume_last_operation_if_it_is_too_behind(cursor.char_index as i64);
-            merged_cursors.push(cursor.apply_merge_context(&right_merge_context));
-        }
+        let last_index = merged_operations
+            .iter()
+            .last()
+            .map(|op| op.operation.end_index())
+            .unwrap_or(0);

-        for cursor in right_cursors {
-            left_merge_context.consume_last_operation_if_it_is_too_behind(cursor.char_index as i64);
-            merged_cursors.push(cursor.apply_merge_context(&left_merge_context));
+        for cursor in left_cursors.chain(right_cursors) {
+            merged_cursors.push(cursor.with_index(last_index));
        }

        Self::new(self.text, merged_operations, merged_cursors)
@ -331,6 +337,7 @@ where
 mod tests {
    use std::env;

+    use insta::assert_debug_snapshot;
    use pretty_assertions::assert_eq;

    use super::*;
@ -354,10 +361,9 @@ mod tests {

        let operations = EditedText::from_strings(text, text.into());

-        assert_eq!(operations.operations.len(), 0);
+        assert_debug_snapshot!(operations);

        let new_right = operations.apply();
-
        assert_eq!(new_right.to_string(), text);
    }

--- a/backend/reconcile/src/operation_transformation/operation.rs
+++ b/backend/reconcile/src/operation_transformation/operation.rs
@ -20,6 +20,14 @@ pub enum Operation<T>
 where
    T: PartialEq + Clone + std::fmt::Debug,
 {
+    Equal {
+        index: usize,
+        length: usize,
+
+        #[cfg(debug_assertions)]
+        text: Option<String>,
+    },
+
    Insert {
        index: usize,
        text: Vec<Token<T>>,
@ -38,6 +46,37 @@ impl<T> Operation<T>
 where
    T: PartialEq + Clone + std::fmt::Debug,
 {
+    /// Creates an equal operation with the given index.
+    /// This operation is used to indicate that the text at the given index
+    /// is unchanged.
+    pub fn create_equal(index: usize, length: usize) -> Option<Self> {
+        if length == 0 {
+            return None;
+        }
+
+        Some(Operation::Equal {
+            index,
+            length,
+
+            #[cfg(debug_assertions)]
+            text: None,
+        })
+    }
+
+    pub fn create_equal_with_text(index: usize, text: String) -> Option<Self> {
+        if text.is_empty() {
+            return None;
+        }
+
+        Some(Operation::Equal {
+            index,
+            length: text.chars().count(),
+
+            #[cfg(debug_assertions)]
+            text: Some(text),
+        })
+    }
+
    /// Creates an insert operation with the given index and text.
    /// If the text is empty (meaning that the operation would be a no-op),
    /// returns None.
@ -87,6 +126,20 @@ where
    /// on a range of text that does not match the text to be deleted.
    pub fn apply<'a>(&self, mut builder: StringBuilder<'a>) -> StringBuilder<'a> {
        match self {
+            Operation::Equal {
+                #[cfg(debug_assertions)]
+                text,
+                ..
+            } => {
+                #[cfg(debug_assertions)]
+                debug_assert!(
+                    text.as_ref()
+                        .is_none_or(|text| builder.get_slice(self.range()) == *text),
+                    "Text which is supposed to be equal does not match the text in the range"
+                );
+
+                return builder;
+            }
            Operation::Insert { text, .. } => builder.insert(
                self.start_index(),
                &text.iter().map(Token::original).collect::<String>(),
@ -114,7 +167,9 @@ where
    /// Returns the index of the first character that the operation affects.
    pub fn start_index(&self) -> usize {
        match self {
-            Operation::Insert { index, .. } | Operation::Delete { index, .. } => *index,
+            Operation::Equal { index, .. }
+            | Operation::Insert { index, .. }
+            | Operation::Delete { index, .. } => *index,
        }
    }

@ -135,6 +190,7 @@ where
    /// because empty operations cannot be created.
    pub fn len(&self) -> usize {
        match self {
+            Operation::Equal { length, .. } => *length,
            Operation::Insert { text, .. } => text.iter().map(Token::get_original_length).sum(),
            Operation::Delete {
                deleted_character_count,
@ -147,6 +203,19 @@ where
    /// index.
    pub fn with_index(self, index: usize) -> Self {
        match self {
+            Operation::Equal {
+                length,
+
+                #[cfg(debug_assertions)]
+                text,
+                ..
+            } => Operation::Equal {
+                index,
+                length,
+
+                #[cfg(debug_assertions)]
+                text,
+            },
            Operation::Insert { text, .. } => Operation::Insert { index, text },
            Operation::Delete {
                deleted_character_count,
@ -191,7 +260,7 @@ where
        let operation = self.with_shifted_index(affecting_context.shift);

        match (operation, affecting_context.last_operation()) {
-            (operation @ Operation::Insert { .. }, None) => {
+            (operation @ Operation::Insert { .. }, None | Some(Operation::Equal { .. })) => {
                produced_context.shift += operation.len() as i64;
                produced_context.consume_and_replace_last_operation(Some(operation.clone()));
                Some(operation)
@ -227,7 +296,10 @@ where
                trimmed_operation
            }

-            (operation @ Operation::Delete { .. }, None | Some(Operation::Insert { .. })) => {
+            (
+                operation @ Operation::Delete { .. },
+                None | Some(Operation::Insert { .. }) | Some(Operation::Equal { .. }),
+            ) => {
                produced_context.consume_and_replace_last_operation(Some(operation.clone()));
                Some(operation)
            }
@ -286,6 +358,41 @@ where

                updated_delete
            }
+            (
+                ref operation @ Operation::Equal {
+                    length,
+                    #[cfg(debug_assertions)]
+                    ref text,
+                    ..
+                },
+                Some(last_delete @ Operation::Delete { .. }),
+            ) => {
+                debug_assert!(
+                    last_delete.range().contains(&operation.start_index()),
+                    "There is a last delete ({last_delete}) but the operation ({operation}) is \
+                     not contained in it"
+                );
+
+                let overlap = (length as i64)
+                    .min(last_delete.end_index() as i64 - operation.start_index() as i64 + 1);
+
+                if cfg!(debug_assertions) && text.is_some() {
+                    Operation::create_equal_with_text(
+                        operation.end_index().min(last_delete.end_index()),
+                        text.clone()
+                            .unwrap()
+                            .chars()
+                            .skip(overlap as usize)
+                            .collect::<String>(),
+                    )
+                } else {
+                    Operation::create_equal(
+                        operation.end_index().min(last_delete.end_index()),
+                        (length as i64 - overlap) as usize,
+                    )
+                }
+            }
+            (operation @ Operation::Equal { .. }, _) => Some(operation),
        }
    }
 }
@ -296,6 +403,28 @@ where
 {
    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
        match self {
+            Operation::Equal {
+                index,
+                length,
+
+                #[cfg(debug_assertions)]
+                text,
+            } => {
+                #[cfg(debug_assertions)]
+                write!(
+                    f,
+                    "<equal {} from index {}>",
+                    text.as_ref()
+                        .map(|text| format!("'{text}'"))
+                        .unwrap_or(format!("{length} characters")),
+                    index
+                )?;
+
+                #[cfg(not(debug_assertions))]
+                write!(f, "<equal {length} from index {index}>")?;
+
+                Ok(())
+            }
            Operation::Insert { index, text } => {
                write!(
                    f,
--- a/backend/reconcile/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_texttestscalculate_operations.snap
+++ b/backend/reconcile/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_texttestscalculate_operations.snap
@ -14,6 +14,22 @@ EditedText {
            order: 0,
            operation: <delete 'hello world!' from index 17>,
        },
+        OrderedOperation {
+            order: 12,
+            operation: <equal ' ' from index 17>,
+        },
+        OrderedOperation {
+            order: 13,
+            operation: <equal 'How' from index 18>,
+        },
+        OrderedOperation {
+            order: 16,
+            operation: <equal ' ' from index 21>,
+        },
+        OrderedOperation {
+            order: 17,
+            operation: <equal 'are' from index 22>,
+        },
        OrderedOperation {
            order: 20,
            operation: <insert ' you doing? Albert' from index 25>,
--- a/backend/reconcile/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_texttestscalculate_operations_with_no_diff.snap
+++ b/backend/reconcile/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_texttestscalculate_operations_with_no_diff.snap
@ -0,0 +1,23 @@
+---
+source: reconcile/src/operation_transformation/edited_text.rs
+expression: operations
+snapshot_kind: text
+---
+EditedText {
+    text: "hello world!",
+    operations: [
+        OrderedOperation {
+            order: 0,
+            operation: <equal 'hello' from index 0>,
+        },
+        OrderedOperation {
+            order: 5,
+            operation: <equal ' ' from index 5>,
+        },
+        OrderedOperation {
+            order: 6,
+            operation: <equal 'world!' from index 6>,
+        },
+    ],
+    cursors: [],
+}
--- a/backend/reconcile/src/tokenizer/word_tokenizer.rs
+++ b/backend/reconcile/src/tokenizer/word_tokenizer.rs
@ -1,29 +1,39 @@
 use super::token::Token;

-/// Splits on whitespace keeping the leading whitespace.
+/// Splits on word boundaries creating alternating words and whitespaces with
+/// the whitesspaces getting unique IDs.
 ///
-///     
 /// ## Example
 ///
-/// "Hi there!" -> ["Hi", " there!"]
+/// "Hi there!" -> ["Hi", " " ", "there!"]
 pub fn word_tokenizer(text: &str) -> Vec<Token<String>> {
    let mut result: Vec<Token<String>> = Vec::new();

-    let mut last_whitespace = 0;
-    let mut previous_char_is_whitespace = true;
+    let mut previous_boundary_index = 0;
+    let mut previous_char_is_whitespace = text.chars().next().map_or(true, |c| c.is_whitespace());

    for (i, c) in text.char_indices() {
        let is_current_char_whitespace = c.is_whitespace();
-        if !previous_char_is_whitespace && is_current_char_whitespace {
-            result.push(text[last_whitespace..i].into());
-            last_whitespace = i;
+        if previous_char_is_whitespace != is_current_char_whitespace {
+            result.push(text[previous_boundary_index..i].into());
+            previous_boundary_index = i;
        }

        previous_char_is_whitespace = is_current_char_whitespace;
    }

-    if last_whitespace < text.len() {
-        result.push(text[last_whitespace..].into());
+    if previous_boundary_index < text.len() {
+        result.push(text[previous_boundary_index..].into());
+    }
+
+    if result.is_empty() {
+        return result;
+    }
+
+    for i in 0..result.len() - 1 {
+        if result[i].original().chars().all(|c| c.is_whitespace()) {
+            result[i].normalised = result[i].normalised().to_owned() + result[i + 1].original()
+        }
    }

    result