Elongate equals too

Remove clutter
Use loop instead of iter
2025-06-15 10:23:14 +01:00 · 2025-06-14 14:25:36 +01:00 · 2025-06-14 14:24:48 +01:00 · 2025-06-14 12:04:19 +01:00 · 2025-06-14 12:01:34 +01:00 · 2025-06-14 11:44:20 +01:00
17 changed files with 359 additions and 317 deletions
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@ -5,5 +5,6 @@
        "**/dist": true,
        "**/node_modules": true,
        "**/.sqlx": true,
+        "**/snapshots": true,
    }
 }
--- a/backend/reconcile/src/diffs/myers.rs
+++ b/backend/reconcile/src/diffs/myers.rs
@ -30,7 +30,7 @@ use crate::{
    utils::{common_prefix_len::common_prefix_len, common_suffix_len::common_suffix_len},
 };

-/// Myers' diff algorithm with deadline.
+/// Myers' diff algorithm.
 ///
 /// Diff `old`, between indices `old_range` and `new` between indices
 /// `new_range`.
--- a/backend/reconcile/src/diffs/raw_operation.rs
+++ b/backend/reconcile/src/diffs/raw_operation.rs
@ -30,12 +30,12 @@ where

    pub fn is_left_joinable(&self) -> bool {
        let first_token = self.tokens().first();
-        first_token.is_none_or(super::super::tokenizer::token::Token::get_is_left_joinable)
+        first_token.is_none_or(|token| token.is_left_joinable)
    }

    pub fn is_right_joinable(&self) -> bool {
        let last_token = self.tokens().last();
-        last_token.is_none_or(super::super::tokenizer::token::Token::get_is_right_joinable)
+        last_token.is_none_or(|token| token.is_right_joinable)
    }

    /// Extends the operation with another operation. Only operations of the
@ -49,8 +49,8 @@ where
        );

        match (self, other) {
-            (RawOperation::Insert(tokens1), RawOperation::Insert(tokens2)) => {
-                RawOperation::Insert(tokens1.into_iter().chain(tokens2).collect())
+            (RawOperation::Insert(self_tokens), RawOperation::Insert(other_tokens)) => {
+                RawOperation::Insert(self_tokens.into_iter().chain(other_tokens).collect())
            }
            (RawOperation::Delete(tokens1), RawOperation::Delete(tokens2)) => {
                RawOperation::Delete(tokens1.into_iter().chain(tokens2).collect())
--- a/backend/reconcile/src/lib.rs
+++ b/backend/reconcile/src/lib.rs
@ -7,4 +7,4 @@ pub use operation_transformation::{
    CursorPosition, EditedText, TextWithCursors, reconcile, reconcile_with_cursors,
    reconcile_with_tokenizer,
 };
-pub use tokenizer::{Tokenizer, token::Token};
+pub use tokenizer::{Tokenizer, token::Token, word_tokenizer::word_tokenizer};
--- a/backend/reconcile/src/operation_transformation.rs
+++ b/backend/reconcile/src/operation_transformation.rs
@ -3,6 +3,7 @@ mod edited_text;
 mod merge_context;
 mod operation;
 mod ordered_operation;
+mod utils;

 pub use cursor::{CursorPosition, TextWithCursors};
 pub use edited_text::EditedText;
--- a/backend/reconcile/src/operation_transformation/edited_text.rs
+++ b/backend/reconcile/src/operation_transformation/edited_text.rs
@ -1,23 +1,29 @@
-use core::iter;
-
 #[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};

 use super::{CursorPosition, Operation, TextWithCursors, ordered_operation::OrderedOperation};
 use crate::{
    diffs::{myers::diff, raw_operation::RawOperation},
-    operation_transformation::merge_context::MergeContext,
+    operation_transformation::{
+        merge_context::MergeContext,
+        utils::{cook_operations::cook_operations, elongate_operations::elongate_operations},
+    },
    tokenizer::{Tokenizer, word_tokenizer::word_tokenizer},
-    utils::{merge_iters::MergeSorted as _, side::Side, string_builder::StringBuilder},
+    utils::{side::Side, string_builder::StringBuilder},
 };

-/// A sequence of operations that can be applied to a text document.
-/// `EditedText` supports merging two sequences of operations using the
-/// principle of Operational Transformation.
+/// A text document and a sequence of operations that can be applied to the text
+/// document. `EditedText` supports merging two sequences of operations using
+/// the principles of Operational Transformation.
 ///
 /// It's mainly created through the `from_strings` method, then merged with
 /// another `EditedText` derived from the same original text and then applied to
 /// the original text to get the reconciled text of concurrent edits.
+///
+/// In addition to text and operations, it also keeps track of cursor positions
+/// in the original text. The cursor positions are updated when the operations
+/// are applied, so that the cursor positions can be used to restore the
+/// cursor positions in the updated text.
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone, PartialEq, Default)]
 pub struct EditedText<'a, T>
@ -63,123 +69,11 @@ where

        Self::new(
            original,
-            Self::cook_operations(Self::elongate_operations(diff)).collect(),
+            cook_operations(elongate_operations(diff)).collect(),
            updated.cursors,
        )
    }

-    fn elongate_operations<I>(raw_operations: I) -> Vec<RawOperation<T>>
-    where
-        I: IntoIterator<Item = RawOperation<T>>,
-    {
-        // This might look bad, but this makes sense. The inserts and deltes can be
-        // interleaved, such as: IDIDID and we need to turn this into IIIDDD.
-        // So we need to keep track of both the last insert and delete operations, not
-        // just the last one.
-        let mut maybe_previous_insert: Option<RawOperation<T>> = None;
-        let mut maybe_previous_delete: Option<RawOperation<T>> = None;
-
-        let mut result: Vec<RawOperation<T>> = raw_operations
-            .into_iter()
-            .flat_map(|next| match next {
-                RawOperation::Insert(..) => match maybe_previous_insert.take() {
-                    Some(prev) if prev.is_right_joinable() && next.is_left_joinable() => {
-                        maybe_previous_insert = Some(prev.extend(next));
-                        Box::new(iter::empty()) as Box<dyn Iterator<Item = RawOperation<T>>>
-                    }
-                    prev => {
-                        maybe_previous_insert = Some(next);
-                        Box::new(prev.into_iter())
-                    }
-                },
-                RawOperation::Delete(..) => match maybe_previous_delete.take() {
-                    Some(prev) if prev.is_right_joinable() && next.is_left_joinable() => {
-                        maybe_previous_delete = Some(prev.extend(next));
-                        Box::new(iter::empty()) as Box<dyn Iterator<Item = RawOperation<T>>>
-                    }
-                    prev => {
-                        maybe_previous_delete = Some(next);
-                        Box::new(prev.into_iter())
-                    }
-                },
-                RawOperation::Equal(..) => Box::new(
-                    maybe_previous_insert
-                        .take()
-                        .into_iter()
-                        .chain(maybe_previous_delete.take())
-                        .chain(iter::once(next)),
-                )
-                    as Box<dyn Iterator<Item = RawOperation<T>>>,
-            })
-            .collect();
-
-        if let Some(prev) = maybe_previous_insert {
-            result.push(prev);
-        }
-
-        if let Some(prev) = maybe_previous_delete {
-            result.push(prev);
-        }
-
-        result
-    }
-
-    // Turn raw operations into ordered operations while keeping track of old & new
-    // indexes.
-    fn cook_operations<I>(raw_operations: I) -> impl Iterator<Item = OrderedOperation<T>>
-    where
-        I: IntoIterator<Item = RawOperation<T>>,
-    {
-        let mut new_index = 0; // this is the start index of the operation on the new text
-        let mut order = 0; // this is the start index of the operation on the original text
-
-        raw_operations.into_iter().filter_map(move |raw_operation| {
-            let length = raw_operation.original_text_length();
-
-            match raw_operation {
-                RawOperation::Equal(..) => {
-                    let op = if cfg!(debug_assertions) {
-                        Operation::create_equal_with_text(
-                            new_index,
-                            raw_operation.get_original_text(),
-                        )
-                    } else {
-                        Operation::create_equal(new_index, length)
-                    }
-                    .map(|operation| OrderedOperation { order, operation });
-
-                    new_index += length;
-                    order += length;
-
-                    op
-                }
-                RawOperation::Insert(tokens) => {
-                    let op = Operation::create_insert(new_index, tokens)
-                        .map(|operation| OrderedOperation { order, operation });
-
-                    new_index += length;
-
-                    op
-                }
-                RawOperation::Delete(..) => {
-                    let op = if cfg!(debug_assertions) {
-                        Operation::create_delete_with_text(
-                            new_index,
-                            raw_operation.get_original_text(),
-                        )
-                    } else {
-                        Operation::create_delete(new_index, length)
-                    }
-                    .map(|operation| OrderedOperation { order, operation });
-
-                    order += length;
-
-                    op
-                }
-            }
-        })
-    }
-
    /// Create a new `EditedText` with the given operations.
    /// The operations must be in the order in which they are meant to be
    /// applied. The operations must not overlap.
@ -223,82 +117,84 @@ where
        let mut left_cursors = self.cursors.into_iter().peekable();
        let mut right_cursors = other.cursors.into_iter().peekable();

-        let merged_operations: Vec<OrderedOperation<T>> = self
-            .operations
-            .into_iter()
-            // The current text is always the left; the other operation is the right side.
-            .map(|op| (op, Side::Left))
-            .merge_sorted_by_key(
-                other.operations.into_iter().map(|op| (op, Side::Right)),
-                |(operation, _)| {
-                    (
-                        operation.order,
-                        operation.operation.start_index(),
-                        // Make sure that the ordering is deterministic regardless which text
-                        // is left or right.
-                        match &operation.operation {
-                            Operation::Equal { index, .. } => index.to_string(),
-                            Operation::Insert { text, .. } => text
-                                .iter()
-                                .map(crate::tokenizer::token::Token::original)
-                                .collect::<String>(),
-                            Operation::Delete {
-                                deleted_character_count,
-                                ..
-                            } => deleted_character_count.to_string(),
-                        },
-                    )
-                },
-            )
-            .flat_map(|(OrderedOperation { order, operation }, side)| {
-                let original_start = operation.start_index() as i64;
-                let original_end = operation.end_index();
-                let original_length = operation.len() as i64;
+        let mut merged_operations: Vec<OrderedOperation<T>> =
+            Vec::with_capacity(self.operations.len() + other.operations.len());

-                let result = match side {
-                    Side::Left => operation.merge_operations_with_context(
-                        &mut right_merge_context,
-                        &mut left_merge_context,
-                    ),
-                    Side::Right => operation.merge_operations_with_context(
-                        &mut left_merge_context,
-                        &mut right_merge_context,
-                    ),
+        let mut left_iter = self.operations.into_iter();
+        let mut right_iter = other.operations.into_iter();
+
+        let mut maybe_left_op = left_iter.next();
+        let mut maybe_right_op = right_iter.next();
+
+        loop {
+            let (side, OrderedOperation { operation, order }) =
+                match (maybe_left_op.clone(), maybe_right_op.clone()) {
+                    (Some(left_op), Some(right_op)) => {
+                        if left_op < right_op {
+                            (Side::Left, left_op)
+                        } else {
+                            (Side::Right, right_op)
+                        }
+                    }
+
+                    (Some(left_op), None) => (Side::Left, left_op),
+                    (None, Some(right_op)) => (Side::Right, right_op),
+                    (None, None) => break,
                };

-                if let Some(ref op @ (Operation::Insert { .. } | Operation::Equal { .. })) = result
-                {
-                    let shift = op.start_index() as i64 - original_start + op.len() as i64
-                        - original_length;
-                    match side {
-                        Side::Left => {
-                            while let Some(cursor) =
-                                left_cursors.next_if(|cursor| cursor.char_index <= original_end + 1)
-                            {
-                                merged_cursors.push(cursor.with_index(
-                                    (op.start_index() as i64).max(cursor.char_index as i64 + shift)
-                                        as usize,
-                                ));
-                            }
+            if side == Side::Left {
+                maybe_left_op = left_iter.next();
+            } else {
+                maybe_right_op = right_iter.next();
+            }
+
+            let original_start = operation.start_index() as i64;
+            let original_end = operation.end_index();
+            let original_length = operation.len() as i64;
+
+            let result = match side {
+                Side::Left => operation.merge_operations_with_context(
+                    &mut right_merge_context,
+                    &mut left_merge_context,
+                ),
+                Side::Right => operation.merge_operations_with_context(
+                    &mut left_merge_context,
+                    &mut right_merge_context,
+                ),
+            };
+
+            if let Some(ref op @ (Operation::Insert { .. } | Operation::Equal { .. })) = result {
+                let shift =
+                    op.start_index() as i64 - original_start + op.len() as i64 - original_length;
+                match side {
+                    Side::Left => {
+                        while let Some(cursor) =
+                            left_cursors.next_if(|cursor| cursor.char_index <= original_end + 1)
+                        {
+                            merged_cursors.push(cursor.with_index(
+                                (op.start_index() as i64).max(cursor.char_index as i64 + shift)
+                                    as usize,
+                            ));
                        }
-                        Side::Right => {
-                            while let Some(cursor) = right_cursors
-                                .next_if(|cursor| cursor.char_index <= original_end + 1)
-                            {
-                                merged_cursors.push(cursor.with_index(
-                                    (op.start_index() as i64).max(cursor.char_index as i64 + shift)
-                                        as usize,
-                                ));
-                            }
+                    }
+                    Side::Right => {
+                        while let Some(cursor) =
+                            right_cursors.next_if(|cursor| cursor.char_index <= original_end + 1)
+                        {
+                            merged_cursors.push(cursor.with_index(
+                                (op.start_index() as i64).max(cursor.char_index as i64 + shift)
+                                    as usize,
+                            ));
                        }
                    }
                }
+            }

-                result
-                    .map(|operation| OrderedOperation { order, operation })
-                    .into_iter()
-            })
-            .collect();
+            merged_operations.extend(result.into_iter().map(|op| OrderedOperation {
+                order,
+                operation: op,
+            }));
+        }

        let last_index = merged_operations
            .iter()
--- a/backend/reconcile/src/operation_transformation/ordered_operation.rs
+++ b/backend/reconcile/src/operation_transformation/ordered_operation.rs
@ -12,3 +12,37 @@ where
    pub order: usize,
    pub operation: Operation<T>,
 }
+
+impl<T> OrderedOperation<T>
+where
+    T: PartialEq + Clone + std::fmt::Debug,
+{
+    pub fn get_sort_key(&self) -> (usize, usize, String) {
+        (
+            self.order,
+            self.operation.start_index(),
+            // Make sure that the ordering is deterministic regardless of which text
+            // is left or right.
+            match &self.operation {
+                Operation::Equal { index, .. } => index.to_string(),
+                Operation::Insert { text, .. } => text
+                    .iter()
+                    .map(crate::tokenizer::token::Token::original)
+                    .collect::<String>(),
+                Operation::Delete {
+                    deleted_character_count,
+                    ..
+                } => deleted_character_count.to_string(),
+            },
+        )
+    }
+}
+
+impl<T> PartialOrd for OrderedOperation<T>
+where
+    T: PartialEq + Clone + std::fmt::Debug,
+{
+    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
+        self.get_sort_key().partial_cmp(&other.get_sort_key())
+    }
+}
--- a/backend/reconcile/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_texttestscalculate_operations.snap
+++ b/backend/reconcile/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_texttestscalculate_operations.snap
@ -16,19 +16,7 @@ EditedText {
        },
        OrderedOperation {
            order: 12,
-            operation: <equal ' ' from index 17>,
-        },
-        OrderedOperation {
-            order: 13,
-            operation: <equal 'How' from index 18>,
-        },
-        OrderedOperation {
-            order: 16,
-            operation: <equal ' ' from index 21>,
-        },
-        OrderedOperation {
-            order: 17,
-            operation: <equal 'are' from index 22>,
+            operation: <equal ' How are' from index 17>,
        },
        OrderedOperation {
            order: 20,
--- a/backend/reconcile/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_texttestscalculate_operations_with_no_diff.snap
+++ b/backend/reconcile/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_texttestscalculate_operations_with_no_diff.snap
@ -8,15 +8,7 @@ EditedText {
    operations: [
        OrderedOperation {
            order: 0,
-            operation: <equal 'hello' from index 0>,
-        },
-        OrderedOperation {
-            order: 5,
-            operation: <equal ' ' from index 5>,
-        },
-        OrderedOperation {
-            order: 6,
-            operation: <equal 'world!' from index 6>,
+            operation: <equal 'hello world!' from index 0>,
        },
    ],
    cursors: [],
--- a/backend/reconcile/src/operation_transformation/utils.rs
+++ b/backend/reconcile/src/operation_transformation/utils.rs
@ -0,0 +1,2 @@
+pub mod cook_operations;
+pub mod elongate_operations;
--- a/backend/reconcile/src/operation_transformation/utils/cook_operations.rs
+++ b/backend/reconcile/src/operation_transformation/utils/cook_operations.rs
@ -0,0 +1,55 @@
+use crate::{
+    diffs::raw_operation::RawOperation,
+    operation_transformation::{Operation, ordered_operation::OrderedOperation},
+};
+
+/// Turn raw operations into ordered operations while keeping track of old & new
+/// indexes.
+pub fn cook_operations<I, T>(raw_operations: I) -> impl Iterator<Item = OrderedOperation<T>>
+where
+    I: IntoIterator<Item = RawOperation<T>>,
+    T: PartialEq + Clone + std::fmt::Debug,
+{
+    let mut new_index = 0; // this is the start index of the operation on the new text
+    let mut order = 0; // this is the start index of the operation on the original text
+
+    raw_operations.into_iter().filter_map(move |raw_operation| {
+        let length = raw_operation.original_text_length();
+
+        match raw_operation {
+            RawOperation::Equal(..) => {
+                let op = if cfg!(debug_assertions) {
+                    Operation::create_equal_with_text(new_index, raw_operation.get_original_text())
+                } else {
+                    Operation::create_equal(new_index, length)
+                }
+                .map(|operation| OrderedOperation { order, operation });
+
+                new_index += length;
+                order += length;
+
+                op
+            }
+            RawOperation::Insert(tokens) => {
+                let op = Operation::create_insert(new_index, tokens)
+                    .map(|operation| OrderedOperation { order, operation });
+
+                new_index += length;
+
+                op
+            }
+            RawOperation::Delete(..) => {
+                let op = if cfg!(debug_assertions) {
+                    Operation::create_delete_with_text(new_index, raw_operation.get_original_text())
+                } else {
+                    Operation::create_delete(new_index, length)
+                }
+                .map(|operation| OrderedOperation { order, operation });
+
+                order += length;
+
+                op
+            }
+        }
+    })
+}
--- a/backend/reconcile/src/operation_transformation/utils/elongate_operations.rs
+++ b/backend/reconcile/src/operation_transformation/utils/elongate_operations.rs
@ -0,0 +1,154 @@
+use core::iter;
+
+use crate::diffs::raw_operation::RawOperation;
+
+/// Elongates the operations by merging adjacent insertions and deletions that
+/// can be joined. This makes the subsequent merging of operations more
+/// intuitive.
+pub fn elongate_operations<I, T>(raw_operations: I) -> Vec<RawOperation<T>>
+where
+    I: IntoIterator<Item = RawOperation<T>>,
+    T: PartialEq + Clone + std::fmt::Debug,
+{
+    // This might look bad, but this makes sense. The inserts and deletes can be
+    // interleaved, such as: IDIDID and we need to turn this into IIIDDD.
+    // So we need to keep track of both the last insert and delete operations, not
+    // just the last one.
+    let mut maybe_previous_insert: Option<RawOperation<T>> = None;
+    let mut maybe_previous_delete: Option<RawOperation<T>> = None;
+
+    // Equals can't be interleaved with inserts and deletes
+    let mut maybe_previous_equal: Option<RawOperation<T>> = None;
+
+    let mut result: Vec<RawOperation<T>> = raw_operations
+        .into_iter()
+        .flat_map(|next| match next {
+            RawOperation::Insert(..) => match maybe_previous_insert.take() {
+                Some(prev) if prev.is_right_joinable() && next.is_left_joinable() => {
+                    maybe_previous_insert = Some(prev.extend(next));
+                    Box::new(iter::empty()) as Box<dyn Iterator<Item = RawOperation<T>>>
+                }
+                prev => {
+                    maybe_previous_insert = Some(next);
+                    Box::new(
+                        maybe_previous_equal
+                            .take()
+                            .into_iter()
+                            .chain(prev.into_iter()),
+                    ) as Box<dyn Iterator<Item = RawOperation<T>>>
+                }
+            },
+            RawOperation::Delete(..) => match maybe_previous_delete.take() {
+                Some(prev) if prev.is_right_joinable() && next.is_left_joinable() => {
+                    maybe_previous_delete = Some(prev.extend(next));
+                    Box::new(iter::empty()) as Box<dyn Iterator<Item = RawOperation<T>>>
+                }
+                prev => {
+                    maybe_previous_delete = Some(next);
+                    Box::new(
+                        maybe_previous_equal
+                            .take()
+                            .into_iter()
+                            .chain(prev.into_iter()),
+                    ) as Box<dyn Iterator<Item = RawOperation<T>>>
+                }
+            },
+            RawOperation::Equal(..) => match maybe_previous_equal.take() {
+                Some(prev) if prev.is_right_joinable() && next.is_left_joinable() => {
+                    maybe_previous_equal = Some(prev.extend(next));
+                    Box::new(iter::empty()) as Box<dyn Iterator<Item = RawOperation<T>>>
+                }
+                prev => {
+                    maybe_previous_equal = Some(next);
+                    Box::new(
+                        maybe_previous_insert
+                            .take()
+                            .into_iter()
+                            .chain(maybe_previous_delete.take())
+                            .chain(prev.into_iter()),
+                    ) as Box<dyn Iterator<Item = RawOperation<T>>>
+                }
+            },
+        })
+        .collect();
+
+    if let Some(prev) = maybe_previous_insert {
+        result.push(prev);
+    }
+
+    if let Some(prev) = maybe_previous_delete {
+        result.push(prev);
+    }
+
+    if let Some(prev) = maybe_previous_equal {
+        result.push(prev);
+    }
+
+    result
+}
+
+#[cfg(test)]
+
+mod tests {
+
+    use super::*;
+
+    #[test]
+    fn test_elongate_operations_empty() {
+        let operations: Vec<RawOperation<()>> = vec![];
+        let result = elongate_operations(operations);
+        assert_eq!(result, vec![]);
+    }
+
+    #[test]
+    fn test_elongate_operations_single_operation() {
+        let operations = vec![RawOperation::Insert(vec!["test".into()])];
+        let result = elongate_operations(operations);
+        assert_eq!(result.len(), 1);
+        assert!(matches!(result[0], RawOperation::Insert(_)));
+    }
+
+    #[test]
+    fn test_elongate_operations_interleaved() {
+        let operations = vec![
+            RawOperation::Insert(vec!["a".into()]),
+            RawOperation::Delete(vec!["b".into()]),
+            RawOperation::Insert(vec!["c".into()]),
+            RawOperation::Delete(vec!["d".into()]),
+        ];
+        let result = elongate_operations(operations);
+        assert_eq!(result.len(), 2);
+        assert!(matches!(result[0], RawOperation::Insert(_)));
+        assert!(matches!(result[1], RawOperation::Delete(_)));
+    }
+
+    #[test]
+    fn test_elongate_operations_with_equal() {
+        let operations = vec![
+            RawOperation::Equal(vec!["a".into()]),
+            RawOperation::Equal(vec!["b".into()]),
+            RawOperation::Insert(vec!["c".into()]),
+            RawOperation::Insert(vec!["d".into()]),
+        ];
+        let result = elongate_operations(operations);
+        assert_eq!(result.len(), 2);
+        assert!(matches!(result[0], RawOperation::Equal(_)));
+        assert!(matches!(result[1], RawOperation::Insert(_)));
+    }
+
+    #[test]
+    fn test_elongate_operations_mixed_sequence() {
+        let operations = vec![
+            RawOperation::Insert(vec!["a".into()]),
+            RawOperation::Equal(vec!["b".into()]),
+            RawOperation::Delete(vec!["c".into()]),
+            RawOperation::Equal(vec!["d".into()]),
+        ];
+        let result = elongate_operations(operations);
+        assert_eq!(result.len(), 4);
+        assert!(matches!(result[0], RawOperation::Insert(_)));
+        assert!(matches!(result[1], RawOperation::Equal(_)));
+        assert!(matches!(result[2], RawOperation::Delete(_)));
+        assert!(matches!(result[3], RawOperation::Equal(_)));
+    }
+}
--- a/backend/reconcile/src/tokenizer.rs
+++ b/backend/reconcile/src/tokenizer.rs
@ -3,4 +3,5 @@ use token::Token;
 pub mod token;
 pub mod word_tokenizer;

+/// A trait for tokenizers that take a string and return a list of tokens.
 pub type Tokenizer<T> = dyn Fn(&str) -> Vec<Token<T>>;
--- a/backend/reconcile/src/tokenizer/token.rs
+++ b/backend/reconcile/src/tokenizer/token.rs
@ -2,8 +2,12 @@
 use serde::{Deserialize, Serialize};

 /// A token is a string that has been normalised in some way.
-/// The normalised form is used for comparison, while the original form is used
-/// for applying `Operation`-s.
+///
+/// A token consists of the normalised form is used for comparison, and the
+/// original form used for subsequently applying `Operation`-s to a text
+/// document.
+///
+/// It's UTF-8 compatible.
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone)]
 pub struct Token<T>
@ -11,18 +15,20 @@ where
    T: PartialEq + Clone + std::fmt::Debug,
 {
    /// The normalised form of the token used deriving the diff.
-    pub normalised: T,
+    normalised: T,

    /// The original string, that should be inserted or deleted in the document.
    original: String,

-    /// Whether the token is joinable with the previous token.
-    is_left_joinable: bool,
+    /// Whether the token is semantically joinable with the previous token.
+    pub is_left_joinable: bool,

-    /// Whether the token is joinable with the next token.
-    is_right_joinable: bool,
+    /// Whether the token is semantically joinable with the next token.
+    pub is_right_joinable: bool,
 }

+/// Trivial implementation of Token when the normalised form is the same as the
+/// original string.
 impl From<&str> for Token<String> {
    fn from(text: &str) -> Self { Token::new(text.to_owned(), text.to_owned(), true, true) }
 }
@ -47,13 +53,11 @@ where

    pub fn original(&self) -> &str { &self.original }

+    pub fn set_normalised(&mut self, normalised: T) { self.normalised = normalised; }
+
    pub fn normalised(&self) -> &T { &self.normalised }

    pub fn get_original_length(&self) -> usize { self.original.chars().count() }
-
-    pub fn get_is_left_joinable(&self) -> bool { self.is_left_joinable }
-
-    pub fn get_is_right_joinable(&self) -> bool { self.is_right_joinable }
 }

 impl<T> PartialEq for Token<T>
--- a/backend/reconcile/src/tokenizer/word_tokenizer.rs
+++ b/backend/reconcile/src/tokenizer/word_tokenizer.rs
@ -1,7 +1,7 @@
 use super::token::Token;

-/// Splits on word boundaries creating alternating words and whitespaces with
-/// the whitesspaces getting unique IDs.
+/// Splits text on word boundaries creating tokens of alternating words and
+/// whitespaces with the whitespaces getting unique IDs.
 ///
 /// ## Example
 ///
@ -34,7 +34,8 @@ pub fn word_tokenizer(text: &str) -> Vec<Token<String>> {

    for i in 0..result.len() - 1 {
        if result[i].original().chars().all(char::is_whitespace) {
-            result[i].normalised = result[i].normalised().to_owned() + result[i + 1].original();
+            let normalised = result[i].normalised().to_owned() + result[i + 1].original();
+            result[i].set_normalised(normalised);
        }
    }

--- a/backend/reconcile/src/utils.rs
+++ b/backend/reconcile/src/utils.rs
@ -1,6 +1,5 @@
 pub mod common_prefix_len;
 pub mod common_suffix_len;
 pub mod find_longest_prefix_contained_within;
-pub mod merge_iters;
 pub mod side;
 pub mod string_builder;
--- a/backend/reconcile/src/utils/merge_iters.rs
+++ b/backend/reconcile/src/utils/merge_iters.rs
@ -1,86 +0,0 @@
-use core::{cmp::Ordering, iter::Peekable};
-
-pub struct MergeAscending<L, R, F, O>
-where
-    L: Iterator<Item = R::Item>,
-    R: Iterator,
-    F: Fn(&R::Item) -> O,
-    O: PartialOrd,
-{
-    left: Peekable<L>,
-    right: Peekable<R>,
-    get_key: F,
-}
-
-impl<L, R, F, O> MergeAscending<L, R, F, O>
-where
-    L: Iterator<Item = R::Item>,
-    R: Iterator,
-    F: Fn(&R::Item) -> O,
-    O: PartialOrd,
-{
-    fn new(left: L, right: R, get_key: F) -> Self {
-        MergeAscending {
-            left: left.peekable(),
-            right: right.peekable(),
-            get_key,
-        }
-    }
-}
-
-impl<L, R, F, O> Iterator for MergeAscending<L, R, F, O>
-where
-    L: Iterator<Item = R::Item>,
-    R: Iterator,
-    F: Fn(&R::Item) -> O,
-    O: PartialOrd,
-{
-    type Item = L::Item;
-
-    fn next(&mut self) -> Option<L::Item> {
-        let order = match (self.left.peek(), self.right.peek()) {
-            (Some(l), Some(r)) => (self.get_key)(l).partial_cmp(&(self.get_key)(r)),
-            (Some(_), None) => Some(Ordering::Less),
-            (None, Some(_)) => Some(Ordering::Greater),
-            (None, None) => return None,
-        };
-
-        match order {
-            Some(Ordering::Less | Ordering::Equal) | None => self.left.next(),
-            Some(Ordering::Greater) => self.right.next(),
-        }
-    }
-}
-
-pub trait MergeSorted: Iterator {
-    fn merge_sorted_by_key<R, F, O>(self, other: R, get_key: F) -> MergeAscending<Self, R, F, O>
-    where
-        Self: Sized,
-        R: Iterator<Item = Self::Item>,
-        F: Fn(&Self::Item) -> O,
-        O: PartialOrd,
-    {
-        MergeAscending::new(self, other, get_key)
-    }
-}
-
-impl<T: ?Sized> MergeSorted for T where T: Iterator {}
-
-#[cfg(test)]
-mod tests {
-    use pretty_assertions::assert_eq;
-
-    use super::*;
-
-    #[test]
-    fn test_merge_sorted_by_key() {
-        let left = [9, 7, 5, 3, 1];
-        let right = [7, 6, 5, 4, 3];
-
-        let result: Vec<i32> = left
-            .into_iter()
-            .merge_sorted_by_key(right.into_iter(), |x| -1 * x)
-            .collect();
-        assert_eq!(result, vec![9, 7, 7, 6, 5, 5, 4, 3, 3, 1]);
-    }
-}
Author	SHA1	Message	Date
Andras Schmelczer	c6261d9fc1	Elongate equals too	2025-06-15 10:23:14 +01:00
Andras Schmelczer	85454f9f01	Remove clutter	2025-06-14 14:25:36 +01:00
Andras Schmelczer	44697164a0	Use loop instead of iter	2025-06-14 14:24:48 +01:00
Andras Schmelczer	df5079efea	Make OrderedOperation orderable	2025-06-14 12:04:19 +01:00
Andras Schmelczer	46b52b7aff	Hide snapshots	2025-06-14 12:01:34 +01:00
Andras Schmelczer	2dc0ada1fb	Extract utils	2025-06-14 11:44:20 +01:00
Andras Schmelczer	1038f9cee0	Expose word_tokenizer	2025-06-14 11:44:06 +01:00
Andras Schmelczer	744decb92f	Improve API	2025-06-14 11:30:13 +01:00