Improve API

This commit is contained in:
Andras Schmelczer 2025-06-14 11:30:13 +01:00
parent 433e8f390f
commit 744decb92f
No known key found for this signature in database
GPG key ID: FC8F2C3D3D1A718C
4 changed files with 23 additions and 18 deletions

View file

@ -30,7 +30,7 @@ use crate::{
utils::{common_prefix_len::common_prefix_len, common_suffix_len::common_suffix_len},
};
/// Myers' diff algorithm with deadline.
/// Myers' diff algorithm.
///
/// Diff `old`, between indices `old_range` and `new` between indices
/// `new_range`.

View file

@ -30,12 +30,12 @@ where
pub fn is_left_joinable(&self) -> bool {
let first_token = self.tokens().first();
first_token.is_none_or(super::super::tokenizer::token::Token::get_is_left_joinable)
first_token.is_none_or(|token| token.is_left_joinable)
}
pub fn is_right_joinable(&self) -> bool {
let last_token = self.tokens().last();
last_token.is_none_or(super::super::tokenizer::token::Token::get_is_right_joinable)
last_token.is_none_or(|token| token.is_right_joinable)
}
/// Extends the operation with another operation. Only operations of the
@ -49,8 +49,8 @@ where
);
match (self, other) {
(RawOperation::Insert(tokens1), RawOperation::Insert(tokens2)) => {
RawOperation::Insert(tokens1.into_iter().chain(tokens2).collect())
(RawOperation::Insert(self_tokens), RawOperation::Insert(other_tokens)) => {
RawOperation::Insert(self_tokens.into_iter().chain(other_tokens).collect())
}
(RawOperation::Delete(tokens1), RawOperation::Delete(tokens2)) => {
RawOperation::Delete(tokens1.into_iter().chain(tokens2).collect())

View file

@ -2,8 +2,12 @@
use serde::{Deserialize, Serialize};
/// A token is a string that has been normalised in some way.
/// The normalised form is used for comparison, while the original form is used
/// for applying `Operation`-s.
///
/// It's UTF-8 compatible.
///
/// A token consists of the normalised form is used for comparison, and the
/// original form used for subsequently applying `Operation`-s to a text
/// document.
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct Token<T>
@ -11,18 +15,20 @@ where
T: PartialEq + Clone + std::fmt::Debug,
{
/// The normalised form of the token used deriving the diff.
pub normalised: T,
normalised: T,
/// The original string, that should be inserted or deleted in the document.
original: String,
/// Whether the token is joinable with the previous token.
is_left_joinable: bool,
/// Whether the token is semantically joinable with the previous token.
pub is_left_joinable: bool,
/// Whether the token is joinable with the next token.
is_right_joinable: bool,
/// Whether the token is semantically joinable with the next token.
pub is_right_joinable: bool,
}
/// Trivial implementation of Token when the normalised form is the same as the
/// original string.
impl From<&str> for Token<String> {
fn from(text: &str) -> Self { Token::new(text.to_owned(), text.to_owned(), true, true) }
}
@ -47,13 +53,11 @@ where
pub fn original(&self) -> &str { &self.original }
pub fn set_normalised(&mut self, normalised: T) { self.normalised = normalised; }
pub fn normalised(&self) -> &T { &self.normalised }
pub fn get_original_length(&self) -> usize { self.original.chars().count() }
pub fn get_is_left_joinable(&self) -> bool { self.is_left_joinable }
pub fn get_is_right_joinable(&self) -> bool { self.is_right_joinable }
}
impl<T> PartialEq for Token<T>

View file

@ -1,7 +1,7 @@
use super::token::Token;
/// Splits on word boundaries creating alternating words and whitespaces with
/// the whitesspaces getting unique IDs.
/// the whitespaces getting unique IDs.
///
/// ## Example
///
@ -34,7 +34,8 @@ pub fn word_tokenizer(text: &str) -> Vec<Token<String>> {
for i in 0..result.len() - 1 {
if result[i].original().chars().all(char::is_whitespace) {
result[i].normalised = result[i].normalised().to_owned() + result[i + 1].original();
let normalised = result[i].normalised().to_owned() + result[i + 1].original();
result[i].set_normalised(normalised);
}
}