diff --git a/.vscode/settings.json b/.vscode/settings.json index ce20ced2..e6c9453c 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -5,5 +5,6 @@ "**/dist": true, "**/node_modules": true, "**/.sqlx": true, + "**/snapshots": true, } } diff --git a/backend/reconcile/src/diffs/myers.rs b/backend/reconcile/src/diffs/myers.rs index c0be1d78..501eca80 100644 --- a/backend/reconcile/src/diffs/myers.rs +++ b/backend/reconcile/src/diffs/myers.rs @@ -30,7 +30,7 @@ use crate::{ utils::{common_prefix_len::common_prefix_len, common_suffix_len::common_suffix_len}, }; -/// Myers' diff algorithm with deadline. +/// Myers' diff algorithm. /// /// Diff `old`, between indices `old_range` and `new` between indices /// `new_range`. diff --git a/backend/reconcile/src/diffs/raw_operation.rs b/backend/reconcile/src/diffs/raw_operation.rs index 7630ff7f..2e4a0a70 100644 --- a/backend/reconcile/src/diffs/raw_operation.rs +++ b/backend/reconcile/src/diffs/raw_operation.rs @@ -30,12 +30,12 @@ where pub fn is_left_joinable(&self) -> bool { let first_token = self.tokens().first(); - first_token.is_none_or(super::super::tokenizer::token::Token::get_is_left_joinable) + first_token.is_none_or(|token| token.is_left_joinable) } pub fn is_right_joinable(&self) -> bool { let last_token = self.tokens().last(); - last_token.is_none_or(super::super::tokenizer::token::Token::get_is_right_joinable) + last_token.is_none_or(|token| token.is_right_joinable) } /// Extends the operation with another operation. Only operations of the @@ -49,8 +49,8 @@ where ); match (self, other) { - (RawOperation::Insert(tokens1), RawOperation::Insert(tokens2)) => { - RawOperation::Insert(tokens1.into_iter().chain(tokens2).collect()) + (RawOperation::Insert(self_tokens), RawOperation::Insert(other_tokens)) => { + RawOperation::Insert(self_tokens.into_iter().chain(other_tokens).collect()) } (RawOperation::Delete(tokens1), RawOperation::Delete(tokens2)) => { RawOperation::Delete(tokens1.into_iter().chain(tokens2).collect()) diff --git a/backend/reconcile/src/lib.rs b/backend/reconcile/src/lib.rs index a04ae853..c621ffb2 100644 --- a/backend/reconcile/src/lib.rs +++ b/backend/reconcile/src/lib.rs @@ -7,4 +7,4 @@ pub use operation_transformation::{ CursorPosition, EditedText, TextWithCursors, reconcile, reconcile_with_cursors, reconcile_with_tokenizer, }; -pub use tokenizer::{Tokenizer, token::Token}; +pub use tokenizer::{Tokenizer, token::Token, word_tokenizer::word_tokenizer}; diff --git a/backend/reconcile/src/operation_transformation.rs b/backend/reconcile/src/operation_transformation.rs index 08a55a94..fc280ec1 100644 --- a/backend/reconcile/src/operation_transformation.rs +++ b/backend/reconcile/src/operation_transformation.rs @@ -3,6 +3,7 @@ mod edited_text; mod merge_context; mod operation; mod ordered_operation; +mod utils; pub use cursor::{CursorPosition, TextWithCursors}; pub use edited_text::EditedText; diff --git a/backend/reconcile/src/operation_transformation/edited_text.rs b/backend/reconcile/src/operation_transformation/edited_text.rs index b83441f6..bd9a15d4 100644 --- a/backend/reconcile/src/operation_transformation/edited_text.rs +++ b/backend/reconcile/src/operation_transformation/edited_text.rs @@ -1,23 +1,29 @@ -use core::iter; - #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; use super::{CursorPosition, Operation, TextWithCursors, ordered_operation::OrderedOperation}; use crate::{ diffs::{myers::diff, raw_operation::RawOperation}, - operation_transformation::merge_context::MergeContext, + operation_transformation::{ + merge_context::MergeContext, + utils::{cook_operations::cook_operations, elongate_operations::elongate_operations}, + }, tokenizer::{Tokenizer, word_tokenizer::word_tokenizer}, - utils::{merge_iters::MergeSorted as _, side::Side, string_builder::StringBuilder}, + utils::{side::Side, string_builder::StringBuilder}, }; -/// A sequence of operations that can be applied to a text document. -/// `EditedText` supports merging two sequences of operations using the -/// principle of Operational Transformation. +/// A text document and a sequence of operations that can be applied to the text +/// document. `EditedText` supports merging two sequences of operations using +/// the principles of Operational Transformation. /// /// It's mainly created through the `from_strings` method, then merged with /// another `EditedText` derived from the same original text and then applied to /// the original text to get the reconciled text of concurrent edits. +/// +/// In addition to text and operations, it also keeps track of cursor positions +/// in the original text. The cursor positions are updated when the operations +/// are applied, so that the cursor positions can be used to restore the +/// cursor positions in the updated text. #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone, PartialEq, Default)] pub struct EditedText<'a, T> @@ -63,123 +69,11 @@ where Self::new( original, - Self::cook_operations(Self::elongate_operations(diff)).collect(), + cook_operations(elongate_operations(diff)).collect(), updated.cursors, ) } - fn elongate_operations(raw_operations: I) -> Vec> - where - I: IntoIterator>, - { - // This might look bad, but this makes sense. The inserts and deltes can be - // interleaved, such as: IDIDID and we need to turn this into IIIDDD. - // So we need to keep track of both the last insert and delete operations, not - // just the last one. - let mut maybe_previous_insert: Option> = None; - let mut maybe_previous_delete: Option> = None; - - let mut result: Vec> = raw_operations - .into_iter() - .flat_map(|next| match next { - RawOperation::Insert(..) => match maybe_previous_insert.take() { - Some(prev) if prev.is_right_joinable() && next.is_left_joinable() => { - maybe_previous_insert = Some(prev.extend(next)); - Box::new(iter::empty()) as Box>> - } - prev => { - maybe_previous_insert = Some(next); - Box::new(prev.into_iter()) - } - }, - RawOperation::Delete(..) => match maybe_previous_delete.take() { - Some(prev) if prev.is_right_joinable() && next.is_left_joinable() => { - maybe_previous_delete = Some(prev.extend(next)); - Box::new(iter::empty()) as Box>> - } - prev => { - maybe_previous_delete = Some(next); - Box::new(prev.into_iter()) - } - }, - RawOperation::Equal(..) => Box::new( - maybe_previous_insert - .take() - .into_iter() - .chain(maybe_previous_delete.take()) - .chain(iter::once(next)), - ) - as Box>>, - }) - .collect(); - - if let Some(prev) = maybe_previous_insert { - result.push(prev); - } - - if let Some(prev) = maybe_previous_delete { - result.push(prev); - } - - result - } - - // Turn raw operations into ordered operations while keeping track of old & new - // indexes. - fn cook_operations(raw_operations: I) -> impl Iterator> - where - I: IntoIterator>, - { - let mut new_index = 0; // this is the start index of the operation on the new text - let mut order = 0; // this is the start index of the operation on the original text - - raw_operations.into_iter().filter_map(move |raw_operation| { - let length = raw_operation.original_text_length(); - - match raw_operation { - RawOperation::Equal(..) => { - let op = if cfg!(debug_assertions) { - Operation::create_equal_with_text( - new_index, - raw_operation.get_original_text(), - ) - } else { - Operation::create_equal(new_index, length) - } - .map(|operation| OrderedOperation { order, operation }); - - new_index += length; - order += length; - - op - } - RawOperation::Insert(tokens) => { - let op = Operation::create_insert(new_index, tokens) - .map(|operation| OrderedOperation { order, operation }); - - new_index += length; - - op - } - RawOperation::Delete(..) => { - let op = if cfg!(debug_assertions) { - Operation::create_delete_with_text( - new_index, - raw_operation.get_original_text(), - ) - } else { - Operation::create_delete(new_index, length) - } - .map(|operation| OrderedOperation { order, operation }); - - order += length; - - op - } - } - }) - } - /// Create a new `EditedText` with the given operations. /// The operations must be in the order in which they are meant to be /// applied. The operations must not overlap. @@ -223,82 +117,84 @@ where let mut left_cursors = self.cursors.into_iter().peekable(); let mut right_cursors = other.cursors.into_iter().peekable(); - let merged_operations: Vec> = self - .operations - .into_iter() - // The current text is always the left; the other operation is the right side. - .map(|op| (op, Side::Left)) - .merge_sorted_by_key( - other.operations.into_iter().map(|op| (op, Side::Right)), - |(operation, _)| { - ( - operation.order, - operation.operation.start_index(), - // Make sure that the ordering is deterministic regardless which text - // is left or right. - match &operation.operation { - Operation::Equal { index, .. } => index.to_string(), - Operation::Insert { text, .. } => text - .iter() - .map(crate::tokenizer::token::Token::original) - .collect::(), - Operation::Delete { - deleted_character_count, - .. - } => deleted_character_count.to_string(), - }, - ) - }, - ) - .flat_map(|(OrderedOperation { order, operation }, side)| { - let original_start = operation.start_index() as i64; - let original_end = operation.end_index(); - let original_length = operation.len() as i64; + let mut merged_operations: Vec> = + Vec::with_capacity(self.operations.len() + other.operations.len()); - let result = match side { - Side::Left => operation.merge_operations_with_context( - &mut right_merge_context, - &mut left_merge_context, - ), - Side::Right => operation.merge_operations_with_context( - &mut left_merge_context, - &mut right_merge_context, - ), + let mut left_iter = self.operations.into_iter(); + let mut right_iter = other.operations.into_iter(); + + let mut maybe_left_op = left_iter.next(); + let mut maybe_right_op = right_iter.next(); + + loop { + let (side, OrderedOperation { operation, order }) = + match (maybe_left_op.clone(), maybe_right_op.clone()) { + (Some(left_op), Some(right_op)) => { + if left_op < right_op { + (Side::Left, left_op) + } else { + (Side::Right, right_op) + } + } + + (Some(left_op), None) => (Side::Left, left_op), + (None, Some(right_op)) => (Side::Right, right_op), + (None, None) => break, }; - if let Some(ref op @ (Operation::Insert { .. } | Operation::Equal { .. })) = result - { - let shift = op.start_index() as i64 - original_start + op.len() as i64 - - original_length; - match side { - Side::Left => { - while let Some(cursor) = - left_cursors.next_if(|cursor| cursor.char_index <= original_end + 1) - { - merged_cursors.push(cursor.with_index( - (op.start_index() as i64).max(cursor.char_index as i64 + shift) - as usize, - )); - } + if side == Side::Left { + maybe_left_op = left_iter.next(); + } else { + maybe_right_op = right_iter.next(); + } + + let original_start = operation.start_index() as i64; + let original_end = operation.end_index(); + let original_length = operation.len() as i64; + + let result = match side { + Side::Left => operation.merge_operations_with_context( + &mut right_merge_context, + &mut left_merge_context, + ), + Side::Right => operation.merge_operations_with_context( + &mut left_merge_context, + &mut right_merge_context, + ), + }; + + if let Some(ref op @ (Operation::Insert { .. } | Operation::Equal { .. })) = result { + let shift = + op.start_index() as i64 - original_start + op.len() as i64 - original_length; + match side { + Side::Left => { + while let Some(cursor) = + left_cursors.next_if(|cursor| cursor.char_index <= original_end + 1) + { + merged_cursors.push(cursor.with_index( + (op.start_index() as i64).max(cursor.char_index as i64 + shift) + as usize, + )); } - Side::Right => { - while let Some(cursor) = right_cursors - .next_if(|cursor| cursor.char_index <= original_end + 1) - { - merged_cursors.push(cursor.with_index( - (op.start_index() as i64).max(cursor.char_index as i64 + shift) - as usize, - )); - } + } + Side::Right => { + while let Some(cursor) = + right_cursors.next_if(|cursor| cursor.char_index <= original_end + 1) + { + merged_cursors.push(cursor.with_index( + (op.start_index() as i64).max(cursor.char_index as i64 + shift) + as usize, + )); } } } + } - result - .map(|operation| OrderedOperation { order, operation }) - .into_iter() - }) - .collect(); + merged_operations.extend(result.into_iter().map(|op| OrderedOperation { + order, + operation: op, + })); + } let last_index = merged_operations .iter() diff --git a/backend/reconcile/src/operation_transformation/ordered_operation.rs b/backend/reconcile/src/operation_transformation/ordered_operation.rs index 116b6372..6a668e2c 100644 --- a/backend/reconcile/src/operation_transformation/ordered_operation.rs +++ b/backend/reconcile/src/operation_transformation/ordered_operation.rs @@ -12,3 +12,37 @@ where pub order: usize, pub operation: Operation, } + +impl OrderedOperation +where + T: PartialEq + Clone + std::fmt::Debug, +{ + pub fn get_sort_key(&self) -> (usize, usize, String) { + ( + self.order, + self.operation.start_index(), + // Make sure that the ordering is deterministic regardless of which text + // is left or right. + match &self.operation { + Operation::Equal { index, .. } => index.to_string(), + Operation::Insert { text, .. } => text + .iter() + .map(crate::tokenizer::token::Token::original) + .collect::(), + Operation::Delete { + deleted_character_count, + .. + } => deleted_character_count.to_string(), + }, + ) + } +} + +impl PartialOrd for OrderedOperation +where + T: PartialEq + Clone + std::fmt::Debug, +{ + fn partial_cmp(&self, other: &Self) -> Option { + self.get_sort_key().partial_cmp(&other.get_sort_key()) + } +} diff --git a/backend/reconcile/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_text__tests__calculate_operations.snap b/backend/reconcile/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_text__tests__calculate_operations.snap index 246b2fe0..995db9c8 100644 --- a/backend/reconcile/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_text__tests__calculate_operations.snap +++ b/backend/reconcile/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_text__tests__calculate_operations.snap @@ -16,19 +16,7 @@ EditedText { }, OrderedOperation { order: 12, - operation: , - }, - OrderedOperation { - order: 13, - operation: , - }, - OrderedOperation { - order: 16, - operation: , - }, - OrderedOperation { - order: 17, - operation: , + operation: , }, OrderedOperation { order: 20, diff --git a/backend/reconcile/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_text__tests__calculate_operations_with_no_diff.snap b/backend/reconcile/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_text__tests__calculate_operations_with_no_diff.snap index 33414f8c..7639dbcc 100644 --- a/backend/reconcile/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_text__tests__calculate_operations_with_no_diff.snap +++ b/backend/reconcile/src/operation_transformation/snapshots/reconcile__operation_transformation__edited_text__tests__calculate_operations_with_no_diff.snap @@ -8,15 +8,7 @@ EditedText { operations: [ OrderedOperation { order: 0, - operation: , - }, - OrderedOperation { - order: 5, - operation: , - }, - OrderedOperation { - order: 6, - operation: , + operation: , }, ], cursors: [], diff --git a/backend/reconcile/src/operation_transformation/utils.rs b/backend/reconcile/src/operation_transformation/utils.rs new file mode 100644 index 00000000..5169be92 --- /dev/null +++ b/backend/reconcile/src/operation_transformation/utils.rs @@ -0,0 +1,2 @@ +pub mod cook_operations; +pub mod elongate_operations; diff --git a/backend/reconcile/src/operation_transformation/utils/cook_operations.rs b/backend/reconcile/src/operation_transformation/utils/cook_operations.rs new file mode 100644 index 00000000..44056898 --- /dev/null +++ b/backend/reconcile/src/operation_transformation/utils/cook_operations.rs @@ -0,0 +1,55 @@ +use crate::{ + diffs::raw_operation::RawOperation, + operation_transformation::{Operation, ordered_operation::OrderedOperation}, +}; + +/// Turn raw operations into ordered operations while keeping track of old & new +/// indexes. +pub fn cook_operations(raw_operations: I) -> impl Iterator> +where + I: IntoIterator>, + T: PartialEq + Clone + std::fmt::Debug, +{ + let mut new_index = 0; // this is the start index of the operation on the new text + let mut order = 0; // this is the start index of the operation on the original text + + raw_operations.into_iter().filter_map(move |raw_operation| { + let length = raw_operation.original_text_length(); + + match raw_operation { + RawOperation::Equal(..) => { + let op = if cfg!(debug_assertions) { + Operation::create_equal_with_text(new_index, raw_operation.get_original_text()) + } else { + Operation::create_equal(new_index, length) + } + .map(|operation| OrderedOperation { order, operation }); + + new_index += length; + order += length; + + op + } + RawOperation::Insert(tokens) => { + let op = Operation::create_insert(new_index, tokens) + .map(|operation| OrderedOperation { order, operation }); + + new_index += length; + + op + } + RawOperation::Delete(..) => { + let op = if cfg!(debug_assertions) { + Operation::create_delete_with_text(new_index, raw_operation.get_original_text()) + } else { + Operation::create_delete(new_index, length) + } + .map(|operation| OrderedOperation { order, operation }); + + order += length; + + op + } + } + }) +} diff --git a/backend/reconcile/src/operation_transformation/utils/elongate_operations.rs b/backend/reconcile/src/operation_transformation/utils/elongate_operations.rs new file mode 100644 index 00000000..b728bf0c --- /dev/null +++ b/backend/reconcile/src/operation_transformation/utils/elongate_operations.rs @@ -0,0 +1,154 @@ +use core::iter; + +use crate::diffs::raw_operation::RawOperation; + +/// Elongates the operations by merging adjacent insertions and deletions that +/// can be joined. This makes the subsequent merging of operations more +/// intuitive. +pub fn elongate_operations(raw_operations: I) -> Vec> +where + I: IntoIterator>, + T: PartialEq + Clone + std::fmt::Debug, +{ + // This might look bad, but this makes sense. The inserts and deletes can be + // interleaved, such as: IDIDID and we need to turn this into IIIDDD. + // So we need to keep track of both the last insert and delete operations, not + // just the last one. + let mut maybe_previous_insert: Option> = None; + let mut maybe_previous_delete: Option> = None; + + // Equals can't be interleaved with inserts and deletes + let mut maybe_previous_equal: Option> = None; + + let mut result: Vec> = raw_operations + .into_iter() + .flat_map(|next| match next { + RawOperation::Insert(..) => match maybe_previous_insert.take() { + Some(prev) if prev.is_right_joinable() && next.is_left_joinable() => { + maybe_previous_insert = Some(prev.extend(next)); + Box::new(iter::empty()) as Box>> + } + prev => { + maybe_previous_insert = Some(next); + Box::new( + maybe_previous_equal + .take() + .into_iter() + .chain(prev.into_iter()), + ) as Box>> + } + }, + RawOperation::Delete(..) => match maybe_previous_delete.take() { + Some(prev) if prev.is_right_joinable() && next.is_left_joinable() => { + maybe_previous_delete = Some(prev.extend(next)); + Box::new(iter::empty()) as Box>> + } + prev => { + maybe_previous_delete = Some(next); + Box::new( + maybe_previous_equal + .take() + .into_iter() + .chain(prev.into_iter()), + ) as Box>> + } + }, + RawOperation::Equal(..) => match maybe_previous_equal.take() { + Some(prev) if prev.is_right_joinable() && next.is_left_joinable() => { + maybe_previous_equal = Some(prev.extend(next)); + Box::new(iter::empty()) as Box>> + } + prev => { + maybe_previous_equal = Some(next); + Box::new( + maybe_previous_insert + .take() + .into_iter() + .chain(maybe_previous_delete.take()) + .chain(prev.into_iter()), + ) as Box>> + } + }, + }) + .collect(); + + if let Some(prev) = maybe_previous_insert { + result.push(prev); + } + + if let Some(prev) = maybe_previous_delete { + result.push(prev); + } + + if let Some(prev) = maybe_previous_equal { + result.push(prev); + } + + result +} + +#[cfg(test)] + +mod tests { + + use super::*; + + #[test] + fn test_elongate_operations_empty() { + let operations: Vec> = vec![]; + let result = elongate_operations(operations); + assert_eq!(result, vec![]); + } + + #[test] + fn test_elongate_operations_single_operation() { + let operations = vec![RawOperation::Insert(vec!["test".into()])]; + let result = elongate_operations(operations); + assert_eq!(result.len(), 1); + assert!(matches!(result[0], RawOperation::Insert(_))); + } + + #[test] + fn test_elongate_operations_interleaved() { + let operations = vec![ + RawOperation::Insert(vec!["a".into()]), + RawOperation::Delete(vec!["b".into()]), + RawOperation::Insert(vec!["c".into()]), + RawOperation::Delete(vec!["d".into()]), + ]; + let result = elongate_operations(operations); + assert_eq!(result.len(), 2); + assert!(matches!(result[0], RawOperation::Insert(_))); + assert!(matches!(result[1], RawOperation::Delete(_))); + } + + #[test] + fn test_elongate_operations_with_equal() { + let operations = vec![ + RawOperation::Equal(vec!["a".into()]), + RawOperation::Equal(vec!["b".into()]), + RawOperation::Insert(vec!["c".into()]), + RawOperation::Insert(vec!["d".into()]), + ]; + let result = elongate_operations(operations); + assert_eq!(result.len(), 2); + assert!(matches!(result[0], RawOperation::Equal(_))); + assert!(matches!(result[1], RawOperation::Insert(_))); + } + + #[test] + fn test_elongate_operations_mixed_sequence() { + let operations = vec![ + RawOperation::Insert(vec!["a".into()]), + RawOperation::Equal(vec!["b".into()]), + RawOperation::Delete(vec!["c".into()]), + RawOperation::Equal(vec!["d".into()]), + ]; + let result = elongate_operations(operations); + assert_eq!(result.len(), 4); + assert!(matches!(result[0], RawOperation::Insert(_))); + assert!(matches!(result[1], RawOperation::Equal(_))); + assert!(matches!(result[2], RawOperation::Delete(_))); + assert!(matches!(result[3], RawOperation::Equal(_))); + } +} diff --git a/backend/reconcile/src/tokenizer.rs b/backend/reconcile/src/tokenizer.rs index 7ce6463c..608fe936 100644 --- a/backend/reconcile/src/tokenizer.rs +++ b/backend/reconcile/src/tokenizer.rs @@ -3,4 +3,5 @@ use token::Token; pub mod token; pub mod word_tokenizer; +/// A trait for tokenizers that take a string and return a list of tokens. pub type Tokenizer = dyn Fn(&str) -> Vec>; diff --git a/backend/reconcile/src/tokenizer/token.rs b/backend/reconcile/src/tokenizer/token.rs index 86cbb92f..0c12770d 100644 --- a/backend/reconcile/src/tokenizer/token.rs +++ b/backend/reconcile/src/tokenizer/token.rs @@ -2,8 +2,12 @@ use serde::{Deserialize, Serialize}; /// A token is a string that has been normalised in some way. -/// The normalised form is used for comparison, while the original form is used -/// for applying `Operation`-s. +/// +/// A token consists of the normalised form is used for comparison, and the +/// original form used for subsequently applying `Operation`-s to a text +/// document. +/// +/// It's UTF-8 compatible. #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone)] pub struct Token @@ -11,18 +15,20 @@ where T: PartialEq + Clone + std::fmt::Debug, { /// The normalised form of the token used deriving the diff. - pub normalised: T, + normalised: T, /// The original string, that should be inserted or deleted in the document. original: String, - /// Whether the token is joinable with the previous token. - is_left_joinable: bool, + /// Whether the token is semantically joinable with the previous token. + pub is_left_joinable: bool, - /// Whether the token is joinable with the next token. - is_right_joinable: bool, + /// Whether the token is semantically joinable with the next token. + pub is_right_joinable: bool, } +/// Trivial implementation of Token when the normalised form is the same as the +/// original string. impl From<&str> for Token { fn from(text: &str) -> Self { Token::new(text.to_owned(), text.to_owned(), true, true) } } @@ -47,13 +53,11 @@ where pub fn original(&self) -> &str { &self.original } + pub fn set_normalised(&mut self, normalised: T) { self.normalised = normalised; } + pub fn normalised(&self) -> &T { &self.normalised } pub fn get_original_length(&self) -> usize { self.original.chars().count() } - - pub fn get_is_left_joinable(&self) -> bool { self.is_left_joinable } - - pub fn get_is_right_joinable(&self) -> bool { self.is_right_joinable } } impl PartialEq for Token diff --git a/backend/reconcile/src/tokenizer/word_tokenizer.rs b/backend/reconcile/src/tokenizer/word_tokenizer.rs index 2267f69f..61c3fa3e 100644 --- a/backend/reconcile/src/tokenizer/word_tokenizer.rs +++ b/backend/reconcile/src/tokenizer/word_tokenizer.rs @@ -1,7 +1,7 @@ use super::token::Token; -/// Splits on word boundaries creating alternating words and whitespaces with -/// the whitesspaces getting unique IDs. +/// Splits text on word boundaries creating tokens of alternating words and +/// whitespaces with the whitespaces getting unique IDs. /// /// ## Example /// @@ -34,7 +34,8 @@ pub fn word_tokenizer(text: &str) -> Vec> { for i in 0..result.len() - 1 { if result[i].original().chars().all(char::is_whitespace) { - result[i].normalised = result[i].normalised().to_owned() + result[i + 1].original(); + let normalised = result[i].normalised().to_owned() + result[i + 1].original(); + result[i].set_normalised(normalised); } } diff --git a/backend/reconcile/src/utils.rs b/backend/reconcile/src/utils.rs index 105719bd..91330ca5 100644 --- a/backend/reconcile/src/utils.rs +++ b/backend/reconcile/src/utils.rs @@ -1,6 +1,5 @@ pub mod common_prefix_len; pub mod common_suffix_len; pub mod find_longest_prefix_contained_within; -pub mod merge_iters; pub mod side; pub mod string_builder; diff --git a/backend/reconcile/src/utils/merge_iters.rs b/backend/reconcile/src/utils/merge_iters.rs deleted file mode 100644 index 2730c336..00000000 --- a/backend/reconcile/src/utils/merge_iters.rs +++ /dev/null @@ -1,86 +0,0 @@ -use core::{cmp::Ordering, iter::Peekable}; - -pub struct MergeAscending -where - L: Iterator, - R: Iterator, - F: Fn(&R::Item) -> O, - O: PartialOrd, -{ - left: Peekable, - right: Peekable, - get_key: F, -} - -impl MergeAscending -where - L: Iterator, - R: Iterator, - F: Fn(&R::Item) -> O, - O: PartialOrd, -{ - fn new(left: L, right: R, get_key: F) -> Self { - MergeAscending { - left: left.peekable(), - right: right.peekable(), - get_key, - } - } -} - -impl Iterator for MergeAscending -where - L: Iterator, - R: Iterator, - F: Fn(&R::Item) -> O, - O: PartialOrd, -{ - type Item = L::Item; - - fn next(&mut self) -> Option { - let order = match (self.left.peek(), self.right.peek()) { - (Some(l), Some(r)) => (self.get_key)(l).partial_cmp(&(self.get_key)(r)), - (Some(_), None) => Some(Ordering::Less), - (None, Some(_)) => Some(Ordering::Greater), - (None, None) => return None, - }; - - match order { - Some(Ordering::Less | Ordering::Equal) | None => self.left.next(), - Some(Ordering::Greater) => self.right.next(), - } - } -} - -pub trait MergeSorted: Iterator { - fn merge_sorted_by_key(self, other: R, get_key: F) -> MergeAscending - where - Self: Sized, - R: Iterator, - F: Fn(&Self::Item) -> O, - O: PartialOrd, - { - MergeAscending::new(self, other, get_key) - } -} - -impl MergeSorted for T where T: Iterator {} - -#[cfg(test)] -mod tests { - use pretty_assertions::assert_eq; - - use super::*; - - #[test] - fn test_merge_sorted_by_key() { - let left = [9, 7, 5, 3, 1]; - let right = [7, 6, 5, 4, 3]; - - let result: Vec = left - .into_iter() - .merge_sorted_by_key(right.into_iter(), |x| -1 * x) - .collect(); - assert_eq!(result, vec![9, 7, 7, 6, 5, 5, 4, 3, 3, 1]); - } -}