From bf8d00c5e24a5381cfb8246d8f179d78a7e91ee8 Mon Sep 17 00:00:00 2001 From: Andras Schmelczer Date: Sun, 2 Mar 2025 17:53:21 +0000 Subject: [PATCH] Fix whitespaces --- .../reconcile/src/operation_transformation.rs | 5 +- .../operation_transformation/edited_text.rs | 5 +- .../src/operation_transformation/operation.rs | 11 +---- backend/reconcile/src/tokenizer/token.rs | 7 +-- .../reconcile/src/tokenizer/word_tokenizer.rs | 47 +++++++++++++++++-- 5 files changed, 53 insertions(+), 22 deletions(-) diff --git a/backend/reconcile/src/operation_transformation.rs b/backend/reconcile/src/operation_transformation.rs index 3f83197a..aa891d72 100644 --- a/backend/reconcile/src/operation_transformation.rs +++ b/backend/reconcile/src/operation_transformation.rs @@ -73,7 +73,8 @@ mod test { "original_1 edit_1 original_3", ); - // One deleted a large range, the other deleted subranges and inserted as well + // One deleted a large range, the other deleted subranges and inserted as + // well test_merge_both_ways( "original_1 original_2 original_3 original_4 original_5", "original_1 original_5", @@ -161,6 +162,8 @@ mod test { "hi there my friend ", "hi there you my friend ", ); + + test_merge_both_ways("a", "a b c", "a b c d", "a b c d"); } #[test_matrix( [ diff --git a/backend/reconcile/src/operation_transformation/edited_text.rs b/backend/reconcile/src/operation_transformation/edited_text.rs index 4052485c..87a5df40 100644 --- a/backend/reconcile/src/operation_transformation/edited_text.rs +++ b/backend/reconcile/src/operation_transformation/edited_text.rs @@ -65,7 +65,6 @@ where Self::new( original, - // Self::cook_operations(diff), Self::cook_operations(Self::elongate_operations(diff)).collect(), ) } @@ -191,7 +190,7 @@ where pub fn merge(self, other: Self) -> Self { debug_assert_eq!( self.text, other.text, - "EditedText-s must be derived from the same text to be mergable" + "`EditedText`-s must be derived from the same text to be mergable" ); let mut left_merge_context = MergeContext::default(); @@ -285,7 +284,7 @@ mod tests { let original = "hello world! ..."; let left = "Hello world! I'm Andras."; let right = "Hello world! How are you?"; - let expected = "Hello world! How are you?I'm Andras."; + let expected = "Hello world! I'm Andras. How are you?"; let operations_1 = EditedText::from_strings(original, left); let operations_2 = EditedText::from_strings(original, right); diff --git a/backend/reconcile/src/operation_transformation/operation.rs b/backend/reconcile/src/operation_transformation/operation.rs index a985ad7b..ffc4f7d6 100644 --- a/backend/reconcile/src/operation_transformation/operation.rs +++ b/backend/reconcile/src/operation_transformation/operation.rs @@ -107,15 +107,8 @@ where }) } - /// Tries to apply the operation to the given `ropey::Rope` text, returning - /// the modified text. - /// - /// # Errors - /// - /// Returns a `SyncLibError::OperationApplicationError` if the operation - /// cannot be applied. - /// - /// # Panics + /// Applies the operation to the given `StringBuilder`, returning the + /// modified `StringBuilder`. /// /// When compiled in debug mode, panics if a delete operation is attempted /// on a range of text that does not match the text to be deleted. diff --git a/backend/reconcile/src/tokenizer/token.rs b/backend/reconcile/src/tokenizer/token.rs index b867bb20..ab521a71 100644 --- a/backend/reconcile/src/tokenizer/token.rs +++ b/backend/reconcile/src/tokenizer/token.rs @@ -15,12 +15,7 @@ where } impl From<&str> for Token { - fn from(s: &str) -> Self { - Token { - normalised: s.to_owned(), - original: s.to_owned(), - } - } + fn from(s: &str) -> Self { Token::new(s.trim().to_owned(), s.to_owned()) } } impl Token diff --git a/backend/reconcile/src/tokenizer/word_tokenizer.rs b/backend/reconcile/src/tokenizer/word_tokenizer.rs index 3449cba2..37d748b3 100644 --- a/backend/reconcile/src/tokenizer/word_tokenizer.rs +++ b/backend/reconcile/src/tokenizer/word_tokenizer.rs @@ -1,7 +1,48 @@ use super::token::Token; +/// Splits on whitespace keeping the leading whitespace. +/// +/// +/// ## Example +/// +/// "Hi there!" -> ["Hi", " there!"] pub fn word_tokenizer(text: &str) -> Vec> { - text.split_inclusive(char::is_whitespace) - .map(|s| Token::new(s.to_owned(), s.to_owned())) - .collect() + let mut result: Vec> = Vec::new(); + + let mut last_whitespace = 0; + let mut previous_char_is_whitespace = true; + + for (i, c) in text.char_indices() { + let is_current_char_whitespace = c.is_whitespace(); + if !previous_char_is_whitespace && is_current_char_whitespace { + result.push(text[last_whitespace..i].into()); + last_whitespace = i; + } + + previous_char_is_whitespace = is_current_char_whitespace; + } + + if last_whitespace < text.len() { + result.push(text[last_whitespace..].into()); + } + + result +} + +#[cfg(test)] +mod tests { + use insta::assert_debug_snapshot; + + use super::*; + + #[test] + fn test_with_snapshots() { + assert_debug_snapshot!(word_tokenizer("Hi there!")); + + assert_debug_snapshot!(word_tokenizer("")); + + assert_debug_snapshot!(word_tokenizer(" what? ")); + + assert_debug_snapshot!(word_tokenizer(" hello, \nwhere are you?")); + } }