Improve API
This commit is contained in:
parent
433e8f390f
commit
744decb92f
4 changed files with 23 additions and 18 deletions
|
|
@ -30,7 +30,7 @@ use crate::{
|
|||
utils::{common_prefix_len::common_prefix_len, common_suffix_len::common_suffix_len},
|
||||
};
|
||||
|
||||
/// Myers' diff algorithm with deadline.
|
||||
/// Myers' diff algorithm.
|
||||
///
|
||||
/// Diff `old`, between indices `old_range` and `new` between indices
|
||||
/// `new_range`.
|
||||
|
|
|
|||
|
|
@ -30,12 +30,12 @@ where
|
|||
|
||||
pub fn is_left_joinable(&self) -> bool {
|
||||
let first_token = self.tokens().first();
|
||||
first_token.is_none_or(super::super::tokenizer::token::Token::get_is_left_joinable)
|
||||
first_token.is_none_or(|token| token.is_left_joinable)
|
||||
}
|
||||
|
||||
pub fn is_right_joinable(&self) -> bool {
|
||||
let last_token = self.tokens().last();
|
||||
last_token.is_none_or(super::super::tokenizer::token::Token::get_is_right_joinable)
|
||||
last_token.is_none_or(|token| token.is_right_joinable)
|
||||
}
|
||||
|
||||
/// Extends the operation with another operation. Only operations of the
|
||||
|
|
@ -49,8 +49,8 @@ where
|
|||
);
|
||||
|
||||
match (self, other) {
|
||||
(RawOperation::Insert(tokens1), RawOperation::Insert(tokens2)) => {
|
||||
RawOperation::Insert(tokens1.into_iter().chain(tokens2).collect())
|
||||
(RawOperation::Insert(self_tokens), RawOperation::Insert(other_tokens)) => {
|
||||
RawOperation::Insert(self_tokens.into_iter().chain(other_tokens).collect())
|
||||
}
|
||||
(RawOperation::Delete(tokens1), RawOperation::Delete(tokens2)) => {
|
||||
RawOperation::Delete(tokens1.into_iter().chain(tokens2).collect())
|
||||
|
|
|
|||
|
|
@ -2,8 +2,12 @@
|
|||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// A token is a string that has been normalised in some way.
|
||||
/// The normalised form is used for comparison, while the original form is used
|
||||
/// for applying `Operation`-s.
|
||||
///
|
||||
/// It's UTF-8 compatible.
|
||||
///
|
||||
/// A token consists of the normalised form is used for comparison, and the
|
||||
/// original form used for subsequently applying `Operation`-s to a text
|
||||
/// document.
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Token<T>
|
||||
|
|
@ -11,18 +15,20 @@ where
|
|||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
/// The normalised form of the token used deriving the diff.
|
||||
pub normalised: T,
|
||||
normalised: T,
|
||||
|
||||
/// The original string, that should be inserted or deleted in the document.
|
||||
original: String,
|
||||
|
||||
/// Whether the token is joinable with the previous token.
|
||||
is_left_joinable: bool,
|
||||
/// Whether the token is semantically joinable with the previous token.
|
||||
pub is_left_joinable: bool,
|
||||
|
||||
/// Whether the token is joinable with the next token.
|
||||
is_right_joinable: bool,
|
||||
/// Whether the token is semantically joinable with the next token.
|
||||
pub is_right_joinable: bool,
|
||||
}
|
||||
|
||||
/// Trivial implementation of Token when the normalised form is the same as the
|
||||
/// original string.
|
||||
impl From<&str> for Token<String> {
|
||||
fn from(text: &str) -> Self { Token::new(text.to_owned(), text.to_owned(), true, true) }
|
||||
}
|
||||
|
|
@ -47,13 +53,11 @@ where
|
|||
|
||||
pub fn original(&self) -> &str { &self.original }
|
||||
|
||||
pub fn set_normalised(&mut self, normalised: T) { self.normalised = normalised; }
|
||||
|
||||
pub fn normalised(&self) -> &T { &self.normalised }
|
||||
|
||||
pub fn get_original_length(&self) -> usize { self.original.chars().count() }
|
||||
|
||||
pub fn get_is_left_joinable(&self) -> bool { self.is_left_joinable }
|
||||
|
||||
pub fn get_is_right_joinable(&self) -> bool { self.is_right_joinable }
|
||||
}
|
||||
|
||||
impl<T> PartialEq for Token<T>
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
use super::token::Token;
|
||||
|
||||
/// Splits on word boundaries creating alternating words and whitespaces with
|
||||
/// the whitesspaces getting unique IDs.
|
||||
/// the whitespaces getting unique IDs.
|
||||
///
|
||||
/// ## Example
|
||||
///
|
||||
|
|
@ -34,7 +34,8 @@ pub fn word_tokenizer(text: &str) -> Vec<Token<String>> {
|
|||
|
||||
for i in 0..result.len() - 1 {
|
||||
if result[i].original().chars().all(char::is_whitespace) {
|
||||
result[i].normalised = result[i].normalised().to_owned() + result[i + 1].original();
|
||||
let normalised = result[i].normalised().to_owned() + result[i + 1].original();
|
||||
result[i].set_normalised(normalised);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue