Add string builder/remove ropey

This commit is contained in:
Andras Schmelczer 2024-11-30 10:44:17 +00:00
parent 35be112de2
commit 74626383cc
No known key found for this signature in database
GPG key ID: FC8F2C3D3D1A718C
6 changed files with 139 additions and 44 deletions

View file

@ -4,7 +4,6 @@ version = "0.1.0"
edition = "2021"
[dependencies]
ropey = { version = "1.6.1", default-features = false, features = ["simd"] }
# optional dependencies
serde = { version = "1.0.215", optional = true }

View file

@ -5,8 +5,8 @@ use crate::tokenizer::word_tokenizer::word_tokenizer;
use crate::tokenizer::Tokenizer;
use crate::utils::ordered_operation::OrderedOperation;
use crate::utils::side::Side;
use crate::utils::string_builder::StringBuilder;
use crate::{diffs::myers::diff, utils::merge_iters::MergeSorted};
use ropey::Rope;
use std::iter;
#[cfg(feature = "serde")]
@ -229,14 +229,13 @@ where
///
/// Returns an SyncLibError::OperationError if the operations cannot be applied to the text.
pub fn apply(&self) -> String {
let mut text = Rope::from_str(self.text);
self.operations
.iter()
.fold(
&mut text,
|rope_text, OrderedOperation { operation, .. }| operation.apply(rope_text),
)
.to_string()
let mut builder: StringBuilder<'_> = StringBuilder::new(self.text);
for OrderedOperation { operation, .. } in &self.operations {
builder = operation.apply(builder);
}
builder.build()
}
}
@ -258,7 +257,6 @@ mod tests {
insta::assert_debug_snapshot!(operations);
let new_right = operations.apply();
assert_eq!(new_right.to_string(), right);
}
@ -283,9 +281,7 @@ mod tests {
let expected = "Hello world! I'm Andras.How are you?";
let operations_1 = EditedText::from_strings(original, left);
println!("{:#?}", operations_1);
let operations_2 = EditedText::from_strings(original, right);
println!("{:#?}", operations_2);
let operations = operations_1.merge(operations_2);
assert_eq!(operations.apply(), expected);

View file

@ -33,13 +33,10 @@ where
#[cfg(test)]
mod test {
use std::{fs, ops::Range, path::Path};
use pretty_assertions::assert_eq;
use test_case::test_matrix;
use super::*;
use pretty_assertions::assert_eq;
use std::{fs, ops::Range, path::Path};
use test_case::test_matrix;
#[test]
fn test_merges() {

View file

@ -1,8 +1,9 @@
use crate::utils::find_common_overlap::find_common_overlap;
use crate::utils::string_builder::StringBuilder;
use crate::Token;
use ropey::Rope;
use std::fmt::Debug;
use std::fmt::Display;
use std::ops::Range;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
@ -86,9 +87,9 @@ where
///
/// When compiled in debug mode, panics if a delete operation is attempted on a range
/// of text that does not match the text to be deleted.
pub fn apply<'a>(&self, rope_text: &'a mut Rope) -> &'a mut Rope {
pub fn apply<'a>(&self, mut builder: StringBuilder<'a>) -> StringBuilder<'a> {
match self {
Operation::Insert { text, .. } => rope_text.insert(
Operation::Insert { text, .. } => builder.insert(
self.start_index(),
&text
.iter()
@ -100,24 +101,19 @@ where
deleted_text,
..
} => {
debug_assert!(
rope_text.get_slice(self.range()).is_some(),
"Failed to get slice of text to delete"
);
if let Some(text) = deleted_text {
debug_assert_eq!(
rope_text.get_slice(self.range()).unwrap().to_string(),
builder.get_slice(self.range()),
*text,
"Text to delete does not match the text in the rope"
);
}
rope_text.remove(self.range());
builder.delete(self.range());
}
};
rope_text
builder
}
/// Returns the index of the first character that the operation affects.
@ -130,13 +126,16 @@ where
/// Returns the index of the last character that the operation affects.
pub fn end_index(&self) -> usize {
// len() must be greater than 0 because operations must be non-empty
debug_assert!(
self.len() > 0,
" len() must be greater than 0 because operations must be non-empty"
);
self.start_index() + self.len() - 1
}
/// Returns the range of indices of characters that the operation affects, inclusive.
pub fn range(&self) -> std::ops::RangeInclusive<usize> {
self.start_index()..=self.end_index()
/// Returns the range of indices of characters that the operation affects.
pub fn range(&self) -> Range<usize> {
self.start_index()..self.end_index() + 1
}
/// Returns the number of affected characters. It is always greater than 0 because empty operations cannot be created.
@ -152,7 +151,7 @@ where
}
}
/// Clones the operation while updating the index.
/// Creates a new operation with the same type and text but with the given index.
pub fn with_index(self, index: usize) -> Self {
match self {
Operation::Insert { text, .. } => Operation::Insert { index, text },
@ -172,7 +171,7 @@ where
}
}
/// Clones the operation while shifting the index by the given offset.
/// Creates a new operation with the same type and text but with the index shifted by the given offset.
/// The offset can be negative but the resulting index must be non-negative.
///
/// # Panics
@ -357,21 +356,17 @@ mod tests {
#[test]
fn test_apply_delete_with_create() {
let mut rope = Rope::from_str("hello world");
let builder = StringBuilder::new("hello world");
let operation = Operation::<()>::create_delete_with_text(5, " world".to_string()).unwrap();
operation.apply(&mut rope);
assert_eq!(rope.to_string(), "hello");
assert_eq!(operation.apply(builder).build(), "hello");
}
#[test]
fn test_apply_insert() {
let mut rope = Rope::from_str("hello");
let builder = StringBuilder::new("hello");
let operation = Operation::create_insert(5, vec![" my friend".into()]).unwrap();
operation.apply(&mut rope);
assert_eq!(rope.to_string(), "hello my friend");
assert_eq!(operation.apply(builder).build(), "hello my friend");
}
}

View file

@ -4,3 +4,4 @@ pub mod find_common_overlap;
pub mod merge_iters;
pub mod ordered_operation;
pub mod side;
pub mod string_builder;

View file

@ -0,0 +1,107 @@
use std::ops::Range;
/// A helper for building a string in order based on an original string and a series of insertions and deletions applied to it.
/// It is safe to use with UTF-8 strings as all operations are based on character indices.
#[derive(Debug, Clone)]
pub struct StringBuilder<'a> {
original: &'a str,
last_old_char_index: usize,
buffer: String,
}
impl StringBuilder<'_> {
pub fn new(original: &str) -> StringBuilder {
StringBuilder {
original,
last_old_char_index: 0,
buffer: String::with_capacity(original.len()),
}
}
/// Insert a string at the given index after copying the original string up to that index from the last insertion or deletion.
pub fn insert(&mut self, from: usize, text: &str) {
self.copy_until(from);
self.buffer.push_str(text);
}
/// Delete a string at the given index after copying the original string up to that index from the last insertion or deletion.
pub fn delete(&mut self, range: std::ops::Range<usize>) {
self.copy_until(range.start);
self.last_old_char_index += range.len();
}
fn copy_until(&mut self, index: usize) {
let current_char_count = self.buffer.chars().count();
debug_assert!(
index >= current_char_count,
"String builder only support building in order"
);
let jump = index - current_char_count;
self.buffer.push_str(
&self
.original
.chars()
.skip(self.last_old_char_index)
.take(jump)
.collect::<String>(),
);
self.last_old_char_index += jump;
}
/// Finish building the string after copying the remaining original string since the last insertion or deletion.
pub fn build(mut self) -> String {
self.buffer.push_str(
&self
.original
.chars()
.skip(self.last_old_char_index)
.collect::<String>(),
);
self.buffer
}
#[cfg(debug_assertions)]
pub fn get_slice(&self, range: Range<usize>) -> String {
let result = self
.buffer
.chars()
.chain(self.original.chars().skip(self.last_old_char_index))
.skip(range.start)
.take(range.end - range.start)
.collect::<String>();
debug_assert_eq!(result.chars().count(), range.len(), "Range out of bounds",);
result
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_string_builder() {
let original = "aaa bbb ccc";
let mut builder = StringBuilder::new(original);
builder.insert(0, "ddd ");
builder.delete(4..8);
builder.insert(11, " eee");
assert_eq!(builder.build(), "ddd bbb ccc eee");
}
#[test]
fn test_string_builder2() {
let original = "abcde";
let mut builder = StringBuilder::new(original);
builder.delete(1..4);
assert_eq!(builder.build(), "ae");
}
}