Improve API

2025-06-14 11:30:13 +01:00 · 2025-06-14 11:30:13 +01:00 · 744decb92f
commit 744decb92f
parent 433e8f390f
4 changed files with 23 additions and 18 deletions
--- a/backend/reconcile/src/diffs/myers.rs
+++ b/backend/reconcile/src/diffs/myers.rs
@ -30,7 +30,7 @@ use crate::{
    utils::{common_prefix_len::common_prefix_len, common_suffix_len::common_suffix_len},
 };

-/// Myers' diff algorithm with deadline.
+/// Myers' diff algorithm.
 ///
 /// Diff `old`, between indices `old_range` and `new` between indices
 /// `new_range`.
--- a/backend/reconcile/src/diffs/raw_operation.rs
+++ b/backend/reconcile/src/diffs/raw_operation.rs
@ -30,12 +30,12 @@ where

    pub fn is_left_joinable(&self) -> bool {
        let first_token = self.tokens().first();
-        first_token.is_none_or(super::super::tokenizer::token::Token::get_is_left_joinable)
+        first_token.is_none_or(|token| token.is_left_joinable)
    }

    pub fn is_right_joinable(&self) -> bool {
        let last_token = self.tokens().last();
-        last_token.is_none_or(super::super::tokenizer::token::Token::get_is_right_joinable)
+        last_token.is_none_or(|token| token.is_right_joinable)
    }

    /// Extends the operation with another operation. Only operations of the
@ -49,8 +49,8 @@ where
        );

        match (self, other) {
-            (RawOperation::Insert(tokens1), RawOperation::Insert(tokens2)) => {
-                RawOperation::Insert(tokens1.into_iter().chain(tokens2).collect())
+            (RawOperation::Insert(self_tokens), RawOperation::Insert(other_tokens)) => {
+                RawOperation::Insert(self_tokens.into_iter().chain(other_tokens).collect())
            }
            (RawOperation::Delete(tokens1), RawOperation::Delete(tokens2)) => {
                RawOperation::Delete(tokens1.into_iter().chain(tokens2).collect())
--- a/backend/reconcile/src/tokenizer/token.rs
+++ b/backend/reconcile/src/tokenizer/token.rs
@ -2,8 +2,12 @@
 use serde::{Deserialize, Serialize};

 /// A token is a string that has been normalised in some way.
-/// The normalised form is used for comparison, while the original form is used
-/// for applying `Operation`-s.
+///
+/// It's UTF-8 compatible.
+///
+/// A token consists of the normalised form is used for comparison, and the
+/// original form used for subsequently applying `Operation`-s to a text
+/// document.
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone)]
 pub struct Token<T>
@ -11,18 +15,20 @@ where
    T: PartialEq + Clone + std::fmt::Debug,
 {
    /// The normalised form of the token used deriving the diff.
-    pub normalised: T,
+    normalised: T,

    /// The original string, that should be inserted or deleted in the document.
    original: String,

-    /// Whether the token is joinable with the previous token.
-    is_left_joinable: bool,
+    /// Whether the token is semantically joinable with the previous token.
+    pub is_left_joinable: bool,

-    /// Whether the token is joinable with the next token.
-    is_right_joinable: bool,
+    /// Whether the token is semantically joinable with the next token.
+    pub is_right_joinable: bool,
 }

+/// Trivial implementation of Token when the normalised form is the same as the
+/// original string.
 impl From<&str> for Token<String> {
    fn from(text: &str) -> Self { Token::new(text.to_owned(), text.to_owned(), true, true) }
 }
@ -47,13 +53,11 @@ where

    pub fn original(&self) -> &str { &self.original }

+    pub fn set_normalised(&mut self, normalised: T) { self.normalised = normalised; }
+
    pub fn normalised(&self) -> &T { &self.normalised }

    pub fn get_original_length(&self) -> usize { self.original.chars().count() }
-
-    pub fn get_is_left_joinable(&self) -> bool { self.is_left_joinable }
-
-    pub fn get_is_right_joinable(&self) -> bool { self.is_right_joinable }
 }

 impl<T> PartialEq for Token<T>
--- a/backend/reconcile/src/tokenizer/word_tokenizer.rs
+++ b/backend/reconcile/src/tokenizer/word_tokenizer.rs
@ -1,7 +1,7 @@
 use super::token::Token;

 /// Splits on word boundaries creating alternating words and whitespaces with
-/// the whitesspaces getting unique IDs.
+/// the whitespaces getting unique IDs.
 ///
 /// ## Example
 ///
@ -34,7 +34,8 @@ pub fn word_tokenizer(text: &str) -> Vec<Token<String>> {

    for i in 0..result.len() - 1 {
        if result[i].original().chars().all(char::is_whitespace) {
-            result[i].normalised = result[i].normalised().to_owned() + result[i + 1].original();
+            let normalised = result[i].normalised().to_owned() + result[i + 1].original();
+            result[i].set_normalised(normalised);
        }
    }