diff --git a/backend/reconcile/src/diffs/myers.rs b/backend/reconcile/src/diffs/myers.rs index c0be1d7..501eca8 100644 --- a/backend/reconcile/src/diffs/myers.rs +++ b/backend/reconcile/src/diffs/myers.rs @@ -30,7 +30,7 @@ use crate::{ utils::{common_prefix_len::common_prefix_len, common_suffix_len::common_suffix_len}, }; -/// Myers' diff algorithm with deadline. +/// Myers' diff algorithm. /// /// Diff `old`, between indices `old_range` and `new` between indices /// `new_range`. diff --git a/backend/reconcile/src/diffs/raw_operation.rs b/backend/reconcile/src/diffs/raw_operation.rs index 7630ff7..2e4a0a7 100644 --- a/backend/reconcile/src/diffs/raw_operation.rs +++ b/backend/reconcile/src/diffs/raw_operation.rs @@ -30,12 +30,12 @@ where pub fn is_left_joinable(&self) -> bool { let first_token = self.tokens().first(); - first_token.is_none_or(super::super::tokenizer::token::Token::get_is_left_joinable) + first_token.is_none_or(|token| token.is_left_joinable) } pub fn is_right_joinable(&self) -> bool { let last_token = self.tokens().last(); - last_token.is_none_or(super::super::tokenizer::token::Token::get_is_right_joinable) + last_token.is_none_or(|token| token.is_right_joinable) } /// Extends the operation with another operation. Only operations of the @@ -49,8 +49,8 @@ where ); match (self, other) { - (RawOperation::Insert(tokens1), RawOperation::Insert(tokens2)) => { - RawOperation::Insert(tokens1.into_iter().chain(tokens2).collect()) + (RawOperation::Insert(self_tokens), RawOperation::Insert(other_tokens)) => { + RawOperation::Insert(self_tokens.into_iter().chain(other_tokens).collect()) } (RawOperation::Delete(tokens1), RawOperation::Delete(tokens2)) => { RawOperation::Delete(tokens1.into_iter().chain(tokens2).collect()) diff --git a/backend/reconcile/src/tokenizer/token.rs b/backend/reconcile/src/tokenizer/token.rs index 86cbb92..23504e7 100644 --- a/backend/reconcile/src/tokenizer/token.rs +++ b/backend/reconcile/src/tokenizer/token.rs @@ -2,8 +2,12 @@ use serde::{Deserialize, Serialize}; /// A token is a string that has been normalised in some way. -/// The normalised form is used for comparison, while the original form is used -/// for applying `Operation`-s. +/// +/// It's UTF-8 compatible. +/// +/// A token consists of the normalised form is used for comparison, and the +/// original form used for subsequently applying `Operation`-s to a text +/// document. #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone)] pub struct Token @@ -11,18 +15,20 @@ where T: PartialEq + Clone + std::fmt::Debug, { /// The normalised form of the token used deriving the diff. - pub normalised: T, + normalised: T, /// The original string, that should be inserted or deleted in the document. original: String, - /// Whether the token is joinable with the previous token. - is_left_joinable: bool, + /// Whether the token is semantically joinable with the previous token. + pub is_left_joinable: bool, - /// Whether the token is joinable with the next token. - is_right_joinable: bool, + /// Whether the token is semantically joinable with the next token. + pub is_right_joinable: bool, } +/// Trivial implementation of Token when the normalised form is the same as the +/// original string. impl From<&str> for Token { fn from(text: &str) -> Self { Token::new(text.to_owned(), text.to_owned(), true, true) } } @@ -47,13 +53,11 @@ where pub fn original(&self) -> &str { &self.original } + pub fn set_normalised(&mut self, normalised: T) { self.normalised = normalised; } + pub fn normalised(&self) -> &T { &self.normalised } pub fn get_original_length(&self) -> usize { self.original.chars().count() } - - pub fn get_is_left_joinable(&self) -> bool { self.is_left_joinable } - - pub fn get_is_right_joinable(&self) -> bool { self.is_right_joinable } } impl PartialEq for Token diff --git a/backend/reconcile/src/tokenizer/word_tokenizer.rs b/backend/reconcile/src/tokenizer/word_tokenizer.rs index 2267f69..46faa42 100644 --- a/backend/reconcile/src/tokenizer/word_tokenizer.rs +++ b/backend/reconcile/src/tokenizer/word_tokenizer.rs @@ -1,7 +1,7 @@ use super::token::Token; /// Splits on word boundaries creating alternating words and whitespaces with -/// the whitesspaces getting unique IDs. +/// the whitespaces getting unique IDs. /// /// ## Example /// @@ -34,7 +34,8 @@ pub fn word_tokenizer(text: &str) -> Vec> { for i in 0..result.len() - 1 { if result[i].original().chars().all(char::is_whitespace) { - result[i].normalised = result[i].normalised().to_owned() + result[i + 1].original(); + let normalised = result[i].normalised().to_owned() + result[i + 1].original(); + result[i].set_normalised(normalised); } }