From 74626383cc21d1954f7b940b1135951c18f7a9d4 Mon Sep 17 00:00:00 2001 From: Andras Schmelczer Date: Sat, 30 Nov 2024 10:44:17 +0000 Subject: [PATCH] Add string builder/remove ropey --- backend/reconcile/Cargo.toml | 1 - .../operation_transformation/edited_text.rs | 20 ++-- .../src/operation_transformation/mod.rs | 9 +- .../src/operation_transformation/operation.rs | 45 ++++---- backend/reconcile/src/utils/mod.rs | 1 + backend/reconcile/src/utils/string_builder.rs | 107 ++++++++++++++++++ 6 files changed, 139 insertions(+), 44 deletions(-) create mode 100644 backend/reconcile/src/utils/string_builder.rs diff --git a/backend/reconcile/Cargo.toml b/backend/reconcile/Cargo.toml index 5303f954..97bbe11e 100644 --- a/backend/reconcile/Cargo.toml +++ b/backend/reconcile/Cargo.toml @@ -4,7 +4,6 @@ version = "0.1.0" edition = "2021" [dependencies] -ropey = { version = "1.6.1", default-features = false, features = ["simd"] } # optional dependencies serde = { version = "1.0.215", optional = true } diff --git a/backend/reconcile/src/operation_transformation/edited_text.rs b/backend/reconcile/src/operation_transformation/edited_text.rs index efd225bd..27c6d06c 100644 --- a/backend/reconcile/src/operation_transformation/edited_text.rs +++ b/backend/reconcile/src/operation_transformation/edited_text.rs @@ -5,8 +5,8 @@ use crate::tokenizer::word_tokenizer::word_tokenizer; use crate::tokenizer::Tokenizer; use crate::utils::ordered_operation::OrderedOperation; use crate::utils::side::Side; +use crate::utils::string_builder::StringBuilder; use crate::{diffs::myers::diff, utils::merge_iters::MergeSorted}; -use ropey::Rope; use std::iter; #[cfg(feature = "serde")] @@ -229,14 +229,13 @@ where /// /// Returns an SyncLibError::OperationError if the operations cannot be applied to the text. pub fn apply(&self) -> String { - let mut text = Rope::from_str(self.text); - self.operations - .iter() - .fold( - &mut text, - |rope_text, OrderedOperation { operation, .. }| operation.apply(rope_text), - ) - .to_string() + let mut builder: StringBuilder<'_> = StringBuilder::new(self.text); + + for OrderedOperation { operation, .. } in &self.operations { + builder = operation.apply(builder); + } + + builder.build() } } @@ -258,7 +257,6 @@ mod tests { insta::assert_debug_snapshot!(operations); let new_right = operations.apply(); - assert_eq!(new_right.to_string(), right); } @@ -283,9 +281,7 @@ mod tests { let expected = "Hello world! I'm Andras.How are you?"; let operations_1 = EditedText::from_strings(original, left); - println!("{:#?}", operations_1); let operations_2 = EditedText::from_strings(original, right); - println!("{:#?}", operations_2); let operations = operations_1.merge(operations_2); assert_eq!(operations.apply(), expected); diff --git a/backend/reconcile/src/operation_transformation/mod.rs b/backend/reconcile/src/operation_transformation/mod.rs index 4805f871..5f4cc317 100644 --- a/backend/reconcile/src/operation_transformation/mod.rs +++ b/backend/reconcile/src/operation_transformation/mod.rs @@ -33,13 +33,10 @@ where #[cfg(test)] mod test { - use std::{fs, ops::Range, path::Path}; - - use pretty_assertions::assert_eq; - - use test_case::test_matrix; - use super::*; + use pretty_assertions::assert_eq; + use std::{fs, ops::Range, path::Path}; + use test_case::test_matrix; #[test] fn test_merges() { diff --git a/backend/reconcile/src/operation_transformation/operation.rs b/backend/reconcile/src/operation_transformation/operation.rs index 8e5e5d88..0fa53dca 100644 --- a/backend/reconcile/src/operation_transformation/operation.rs +++ b/backend/reconcile/src/operation_transformation/operation.rs @@ -1,8 +1,9 @@ use crate::utils::find_common_overlap::find_common_overlap; +use crate::utils::string_builder::StringBuilder; use crate::Token; -use ropey::Rope; use std::fmt::Debug; use std::fmt::Display; +use std::ops::Range; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; @@ -86,9 +87,9 @@ where /// /// When compiled in debug mode, panics if a delete operation is attempted on a range /// of text that does not match the text to be deleted. - pub fn apply<'a>(&self, rope_text: &'a mut Rope) -> &'a mut Rope { + pub fn apply<'a>(&self, mut builder: StringBuilder<'a>) -> StringBuilder<'a> { match self { - Operation::Insert { text, .. } => rope_text.insert( + Operation::Insert { text, .. } => builder.insert( self.start_index(), &text .iter() @@ -100,24 +101,19 @@ where deleted_text, .. } => { - debug_assert!( - rope_text.get_slice(self.range()).is_some(), - "Failed to get slice of text to delete" - ); - if let Some(text) = deleted_text { debug_assert_eq!( - rope_text.get_slice(self.range()).unwrap().to_string(), + builder.get_slice(self.range()), *text, "Text to delete does not match the text in the rope" ); } - rope_text.remove(self.range()); + builder.delete(self.range()); } }; - rope_text + builder } /// Returns the index of the first character that the operation affects. @@ -130,13 +126,16 @@ where /// Returns the index of the last character that the operation affects. pub fn end_index(&self) -> usize { - // len() must be greater than 0 because operations must be non-empty + debug_assert!( + self.len() > 0, + " len() must be greater than 0 because operations must be non-empty" + ); self.start_index() + self.len() - 1 } - /// Returns the range of indices of characters that the operation affects, inclusive. - pub fn range(&self) -> std::ops::RangeInclusive { - self.start_index()..=self.end_index() + /// Returns the range of indices of characters that the operation affects. + pub fn range(&self) -> Range { + self.start_index()..self.end_index() + 1 } /// Returns the number of affected characters. It is always greater than 0 because empty operations cannot be created. @@ -152,7 +151,7 @@ where } } - /// Clones the operation while updating the index. + /// Creates a new operation with the same type and text but with the given index. pub fn with_index(self, index: usize) -> Self { match self { Operation::Insert { text, .. } => Operation::Insert { index, text }, @@ -172,7 +171,7 @@ where } } - /// Clones the operation while shifting the index by the given offset. + /// Creates a new operation with the same type and text but with the index shifted by the given offset. /// The offset can be negative but the resulting index must be non-negative. /// /// # Panics @@ -357,21 +356,17 @@ mod tests { #[test] fn test_apply_delete_with_create() { - let mut rope = Rope::from_str("hello world"); + let builder = StringBuilder::new("hello world"); let operation = Operation::<()>::create_delete_with_text(5, " world".to_string()).unwrap(); - operation.apply(&mut rope); - - assert_eq!(rope.to_string(), "hello"); + assert_eq!(operation.apply(builder).build(), "hello"); } #[test] fn test_apply_insert() { - let mut rope = Rope::from_str("hello"); + let builder = StringBuilder::new("hello"); let operation = Operation::create_insert(5, vec![" my friend".into()]).unwrap(); - operation.apply(&mut rope); - - assert_eq!(rope.to_string(), "hello my friend"); + assert_eq!(operation.apply(builder).build(), "hello my friend"); } } diff --git a/backend/reconcile/src/utils/mod.rs b/backend/reconcile/src/utils/mod.rs index ab4691bd..c0c3c33d 100644 --- a/backend/reconcile/src/utils/mod.rs +++ b/backend/reconcile/src/utils/mod.rs @@ -4,3 +4,4 @@ pub mod find_common_overlap; pub mod merge_iters; pub mod ordered_operation; pub mod side; +pub mod string_builder; diff --git a/backend/reconcile/src/utils/string_builder.rs b/backend/reconcile/src/utils/string_builder.rs new file mode 100644 index 00000000..b0d00d71 --- /dev/null +++ b/backend/reconcile/src/utils/string_builder.rs @@ -0,0 +1,107 @@ +use std::ops::Range; + +/// A helper for building a string in order based on an original string and a series of insertions and deletions applied to it. +/// It is safe to use with UTF-8 strings as all operations are based on character indices. +#[derive(Debug, Clone)] +pub struct StringBuilder<'a> { + original: &'a str, + last_old_char_index: usize, + buffer: String, +} + +impl StringBuilder<'_> { + pub fn new(original: &str) -> StringBuilder { + StringBuilder { + original, + last_old_char_index: 0, + buffer: String::with_capacity(original.len()), + } + } + + /// Insert a string at the given index after copying the original string up to that index from the last insertion or deletion. + pub fn insert(&mut self, from: usize, text: &str) { + self.copy_until(from); + self.buffer.push_str(text); + } + + /// Delete a string at the given index after copying the original string up to that index from the last insertion or deletion. + pub fn delete(&mut self, range: std::ops::Range) { + self.copy_until(range.start); + self.last_old_char_index += range.len(); + } + + fn copy_until(&mut self, index: usize) { + let current_char_count = self.buffer.chars().count(); + debug_assert!( + index >= current_char_count, + "String builder only support building in order" + ); + + let jump = index - current_char_count; + + self.buffer.push_str( + &self + .original + .chars() + .skip(self.last_old_char_index) + .take(jump) + .collect::(), + ); + self.last_old_char_index += jump; + } + + /// Finish building the string after copying the remaining original string since the last insertion or deletion. + pub fn build(mut self) -> String { + self.buffer.push_str( + &self + .original + .chars() + .skip(self.last_old_char_index) + .collect::(), + ); + + self.buffer + } + + #[cfg(debug_assertions)] + pub fn get_slice(&self, range: Range) -> String { + let result = self + .buffer + .chars() + .chain(self.original.chars().skip(self.last_old_char_index)) + .skip(range.start) + .take(range.end - range.start) + .collect::(); + + debug_assert_eq!(result.chars().count(), range.len(), "Range out of bounds",); + + result + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_string_builder() { + let original = "aaa bbb ccc"; + let mut builder = StringBuilder::new(original); + + builder.insert(0, "ddd "); + builder.delete(4..8); + builder.insert(11, " eee"); + + assert_eq!(builder.build(), "ddd bbb ccc eee"); + } + + #[test] + fn test_string_builder2() { + let original = "abcde"; + let mut builder = StringBuilder::new(original); + + builder.delete(1..4); + + assert_eq!(builder.build(), "ae"); + } +}