diff --git a/Cargo.toml b/Cargo.toml index 4211aa5..022b9d4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,7 +34,7 @@ console_error_panic_hook = [ "dep:console_error_panic_hook" ] insta = "1.42.2" pretty_assertions = "1.4.1" serde = { version = "1.0.219", features = ["derive"] } -serde_yaml ="0.9.34" +serde_yaml = "0.9.34" test-case = "3.3.1" wasm-bindgen-test = "0.3.49" @@ -42,7 +42,7 @@ wasm-bindgen-test = "0.3.49" codegen-units = 1 lto = true opt-level = 3 -strip="symbols" +strip = "symbols" [package.metadata.wasm-pack.profile.release] wasm-opt = ['-O4', '--enable-bulk-memory'] @@ -84,7 +84,7 @@ large_stack_arrays = { level = "allow", priority = 1 } # https://github.com/rust # Silly lints implicit_return = { level = "allow", priority = 1 } -question_mark_used = { level = "allow", priority = 1 } +question_mark_used = { level = "allow", priority = 1 } struct_field_names = { level = "allow", priority = 1 } single_char_lifetime_names = { level = "allow", priority = 1 } single_call_fn = { level = "allow", priority = 1 } diff --git a/src/lib.rs b/src/lib.rs index 71d837f..c9ef87c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -54,8 +54,8 @@ //! .map(|sentence| Token::new( //! sentence.to_string(), //! sentence.to_string(), -//! false, // don't allow joining token with the preceeding on -//! false // don't allow joining token with the following one +//! false, // don't allow joining token with the preceding one +//! false, // don't allow joining token with the following one //! )) //! .collect::>() //! }; @@ -68,7 +68,7 @@ //! let result = reconcile(parent, &left.into(), &right.into(), &*BuiltinTokenizer::Word); //! assert_eq!(result.apply().text(), "Hello beautiful world. This is a great test."); //! ``` -//! > By setting the joinability to `false`, longer runs of inserts with be +//! > By setting the joinability to `false`, longer runs of inserts will be //! > interleaved like LRLRLR instead of LLLRRR. //! //! ## Cursors and selection ranges diff --git a/src/tokenizer/token.rs b/src/tokenizer/token.rs index f2926af..58e6ab6 100644 --- a/src/tokenizer/token.rs +++ b/src/tokenizer/token.rs @@ -3,9 +3,9 @@ use std::fmt::Debug; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; -/// A token is a string that has been normalised in some way. +/// A token is a string that has been normalized in some way. /// -/// A token consists of the normalised form is used for comparison, and the +/// A token consists of the normalized form is used for comparison, and the /// original form used for subsequently applying `Operation`-s to a text /// document. /// @@ -16,8 +16,8 @@ pub struct Token where T: PartialEq + Clone + Debug, { - /// The normalised form of the token used deriving the diff. - normalised: T, + /// The normalized form of the token used deriving the diff. + normalized: T, /// The original string, that should be inserted or deleted in the document. original: String, @@ -29,7 +29,7 @@ where pub is_right_joinable: bool, } -/// Trivial implementation of Token when the normalised form is the same as the +/// Trivial implementation of Token when the normalized form is the same as the /// original string. impl From<&str> for Token { fn from(text: &str) -> Self { Token::new(text.to_owned(), text.to_owned(), true, true) } @@ -40,13 +40,13 @@ where T: PartialEq + Clone + Debug, { pub fn new( - normalised: T, + normalized: T, original: String, is_left_joinable: bool, is_right_joinable: bool, ) -> Self { Token { - normalised, + normalized, original, is_left_joinable, is_right_joinable, @@ -55,9 +55,9 @@ where pub fn original(&self) -> &str { &self.original } - pub fn set_normalised(&mut self, normalised: T) { self.normalised = normalised; } + pub fn set_normalized(&mut self, normalized: T) { self.normalized = normalized; } - pub fn normalised(&self) -> &T { &self.normalised } + pub fn normalized(&self) -> &T { &self.normalized } pub fn get_original_length(&self) -> usize { self.original.chars().count() } } @@ -66,5 +66,5 @@ impl PartialEq for Token where T: PartialEq + Clone + Debug, { - fn eq(&self, other: &Self) -> bool { self.normalised == other.normalised } + fn eq(&self, other: &Self) -> bool { self.normalized == other.normalized } } diff --git a/src/tokenizer/word_tokenizer.rs b/src/tokenizer/word_tokenizer.rs index 61c3fa3..bbb570e 100644 --- a/src/tokenizer/word_tokenizer.rs +++ b/src/tokenizer/word_tokenizer.rs @@ -1,7 +1,7 @@ use super::token::Token; -/// Splits text on word boundaries creating tokens of alternating words and -/// whitespaces with the whitespaces getting unique IDs. +/// Splits text on word boundaries, creating tokens of alternating words and +/// whitespace with the whitespace getting unique IDs. /// /// ## Example /// @@ -9,7 +9,7 @@ use super::token::Token; /// "Hi there!" -> ["Hi", " ", "there!"] /// ``` pub fn word_tokenizer(text: &str) -> Vec> { - let mut result: Vec> = Vec::new(); + let mut result = Vec::new(); let mut previous_boundary_index = 0; let mut previous_char_is_whitespace = text.chars().next().is_none_or(char::is_whitespace); @@ -32,10 +32,11 @@ pub fn word_tokenizer(text: &str) -> Vec> { return result; } + // normalize whitespace tokens by concatenating with the following token for i in 0..result.len() - 1 { if result[i].original().chars().all(char::is_whitespace) { - let normalised = result[i].normalised().to_owned() + result[i + 1].original(); - result[i].set_normalised(normalised); + let normalized = result[i].normalized().to_owned() + result[i + 1].original(); + result[i].set_normalized(normalized); } } diff --git a/src/utils/myers_diff.rs b/src/utils/myers_diff.rs index c4c64a9..705a7de 100644 --- a/src/utils/myers_diff.rs +++ b/src/utils/myers_diff.rs @@ -44,7 +44,7 @@ where let max_d = (old.len() + new.len()).div_ceil(2) + 1; let mut vb = V::new(max_d); let mut vf = V::new(max_d); - let mut result: Vec> = vec![]; + let mut result = Vec::new(); conquer( old,