Minimise allocations

This commit is contained in:
Andras Schmelczer 2026-03-11 20:43:34 +00:00
parent 72dc942be6
commit 1a984427ab
2 changed files with 211 additions and 114 deletions

View file

@ -1,10 +1,10 @@
use std::{fmt::Debug, vec};
use std::fmt::Debug;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::{
BuiltinTokenizer, CursorPosition, TextWithCursors,
BuiltinTokenizer, CursorPosition, TextWithCursors, Token,
operation_transformation::{
DiffError, Operation,
utils::{cook_operations::cook_operations, elongate_operations::elongate_operations},
@ -55,6 +55,7 @@ where
{
/// Create an `EditedText` from the given original and updated strings
/// using the provided tokenizer
#[must_use]
pub fn from_strings_with_tokenizer(
original: &'a str,
updated: &TextWithCursors,
@ -134,24 +135,21 @@ where
let mut last_right_op = None;
loop {
let (side, operation, mut last_other_op) =
match (maybe_left_op.clone(), maybe_right_op.clone()) {
(Some(left_op), Some(right_op)) => {
if left_op
.get_sort_key(seen_left_length)
.partial_cmp(&right_op.get_sort_key(seen_right_length))
== Some(std::cmp::Ordering::Less)
{
(Side::Left, left_op, last_right_op.clone())
} else {
(Side::Right, right_op, last_left_op.clone())
}
let (side, operation) = match (maybe_left_op.as_ref(), maybe_right_op.as_ref()) {
(Some(left_op), Some(right_op)) => {
if left_op.cmp_priority(seen_left_length, right_op, seen_right_length)
== std::cmp::Ordering::Less
{
(Side::Left, maybe_left_op.take().unwrap())
} else {
(Side::Right, maybe_right_op.take().unwrap())
}
}
(Some(left_op), None) => (Side::Left, left_op, last_right_op.clone()),
(None, Some(right_op)) => (Side::Right, right_op, last_left_op.clone()),
(None, None) => break,
};
(Some(_), None) => (Side::Left, maybe_left_op.take().unwrap()),
(None, Some(_)) => (Side::Right, maybe_right_op.take().unwrap()),
(None, None) => break,
};
let is_advancing_operation = matches!(
operation,
@ -161,7 +159,7 @@ where
let original_length = operation.len();
let (side, result) = match side {
Side::Left => {
let result = operation.merge_operations(&mut last_other_op);
let result = operation.merge_operations(last_right_op.as_ref());
if let ref op @ (Operation::Insert { .. } | Operation::Equal { .. }) = result {
let merged_length_signed = isize::try_from(merged_length)
@ -195,7 +193,7 @@ where
(Side::Left, result)
}
Side::Right => {
let result = operation.merge_operations(&mut last_other_op);
let result = operation.merge_operations(last_left_op.as_ref());
if let ref op @ (Operation::Insert { .. } | Operation::Equal { .. }) = result {
let merged_length_signed = isize::try_from(merged_length)
@ -304,6 +302,7 @@ where
/// ```
#[must_use]
pub fn apply_with_history(&self) -> Vec<SpanWithHistory> {
let chars: Vec<char> = self.text.chars().collect();
let mut builder: StringBuilder<'_> = StringBuilder::new(self.text);
let mut history = Vec::with_capacity(self.operations.len());
@ -315,34 +314,26 @@ where
Operation::Equal { .. } => {
history.push(SpanWithHistory::new(builder.take(), History::Unchanged));
}
Operation::Insert { .. } => match side {
Side::Left => {
history.push(SpanWithHistory::new(builder.take(), History::AddedFromLeft));
}
Side::Right => history.push(SpanWithHistory::new(
builder.take(),
History::AddedFromRight,
)),
},
Operation::Insert { .. } => {
let h = match side {
Side::Left => History::AddedFromLeft,
Side::Right => History::AddedFromRight,
};
history.push(SpanWithHistory::new(builder.take(), h));
}
Operation::Delete {
deleted_character_count,
order,
..
} => {
let deleted: String = self
.text
.chars()
.skip(*order)
.take(*deleted_character_count)
let deleted: String = chars[*order..*order + *deleted_character_count]
.iter()
.collect();
match side {
Side::Left => {
history.push(SpanWithHistory::new(deleted, History::RemovedFromLeft));
}
Side::Right => {
history.push(SpanWithHistory::new(deleted, History::RemovedFromRight));
}
}
let h = match side {
Side::Left => History::RemovedFromLeft,
Side::Right => History::RemovedFromRight,
};
history.push(SpanWithHistory::new(deleted, h));
}
}
}
@ -350,6 +341,56 @@ where
history
}
/// Apply the operations and return both the merged text with cursors and
/// the provenance history in a single pass
#[must_use]
pub fn apply_with_all(&self) -> (TextWithCursors, Vec<SpanWithHistory>) {
let chars: Vec<char> = self.text.chars().collect();
let mut builder: StringBuilder<'_> = StringBuilder::new(self.text);
let mut history = Vec::with_capacity(self.operations.len());
let mut full_text = String::new();
for (operation, side) in self.operations.iter().zip(self.operation_sides.iter()) {
builder = operation.apply(builder);
match operation {
Operation::Equal { .. } => {
let span = builder.take();
full_text.push_str(&span);
history.push(SpanWithHistory::new(span, History::Unchanged));
}
Operation::Insert { .. } => {
let span = builder.take();
full_text.push_str(&span);
let h = match side {
Side::Left => History::AddedFromLeft,
Side::Right => History::AddedFromRight,
};
history.push(SpanWithHistory::new(span, h));
}
Operation::Delete {
deleted_character_count,
order,
..
} => {
let deleted: String = chars[*order..*order + *deleted_character_count]
.iter()
.collect();
let h = match side {
Side::Left => History::RemovedFromLeft,
Side::Right => History::RemovedFromRight,
};
history.push(SpanWithHistory::new(deleted, h));
}
}
}
(
TextWithCursors::new(full_text, self.cursors.clone()),
history,
)
}
/// Convert the `EditedText` into a terse representation ready for
/// serialization. The result omits cursor positions and the original text.
/// This is useful for sending text diffs over the network if there's a
@ -358,11 +399,11 @@ where
/// Inserts are strings, deletes are negative integers (character count),
/// and retained spans are positive integers (character count).
///
/// # Panics
/// # Errors
///
/// Panics if there's an integer overflow in i64.
#[must_use]
pub fn to_diff(&self) -> Vec<NumberOrText> {
/// Returns `DiffError::IntegerOverflow` if a character count exceeds
/// `i64::MAX`.
pub fn to_diff(&self) -> Result<Vec<NumberOrText>, DiffError> {
let mut result: Vec<NumberOrText> = Vec::with_capacity(self.operations.len());
let mut previous_equal: Option<usize> = None;
@ -378,16 +419,14 @@ where
Operation::Insert { text, .. } => {
if let Some(prev_length) = previous_equal {
result.push(NumberOrText::Number(
i64::try_from(prev_length).expect("prev_length must fit in i64"),
));
result
.push(NumberOrText::Number(i64::try_from(prev_length).map_err(
|_| DiffError::IntegerOverflow { value: prev_length },
)?));
previous_equal = None;
}
let text: String = text
.iter()
.map(super::super::tokenizer::token::Token::original)
.collect();
let text: String = text.iter().map(Token::original).collect();
result.push(NumberOrText::Text(text));
}
@ -396,26 +435,31 @@ where
..
} => {
if let Some(prev_length) = previous_equal {
result.push(NumberOrText::Number(
i64::try_from(prev_length).expect("prev_length must fit in i64"),
));
result
.push(NumberOrText::Number(i64::try_from(prev_length).map_err(
|_| DiffError::IntegerOverflow { value: prev_length },
)?));
previous_equal = None;
}
let count = i64::try_from(*deleted_character_count)
.expect("deleted_character_count must fit in i64");
let count = i64::try_from(*deleted_character_count).map_err(|_| {
DiffError::IntegerOverflow {
value: *deleted_character_count,
}
})?;
result.push(NumberOrText::Number(-count));
}
}
}
if let Some(prev_length) = previous_equal {
result.push(NumberOrText::Number(
i64::try_from(prev_length).expect("prev_length must fit in i64"),
));
result
.push(NumberOrText::Number(i64::try_from(prev_length).map_err(
|_| DiffError::IntegerOverflow { value: prev_length },
)?));
}
result
Ok(result)
}
/// Reconstruct an `EditedText` from a diff and the original text.
@ -435,7 +479,8 @@ where
) -> Result<EditedText<'a, T>, DiffError> {
let mut operations: Vec<Operation<T>> = Vec::with_capacity(diff.len());
let mut order = 0;
let text_length = original_text.chars().count();
let chars: Vec<char> = original_text.chars().collect();
let text_length = chars.len();
for item in diff {
match item {
@ -453,7 +498,7 @@ where
}
let original_characters: String =
original_text.chars().skip(order).take(length).collect();
chars[order..order + length].iter().collect();
let original_tokens = tokenizer(&original_characters);
for token in original_tokens {
@ -590,7 +635,7 @@ mod tests {
let original = "Merging text is hard!";
let changes = "Merging text is easy with reconcile!";
let result = EditedText::from_strings(original, &changes.into());
let serialized = serde_yaml::to_string(&result.to_diff()).unwrap();
let serialized = serde_yaml::to_string(&result.to_diff().unwrap()).unwrap();
let expected = concat!("- 15\n", "- -6\n", "- ' easy with reconcile!'\n",);
assert_eq!(serialized, expected);
@ -622,7 +667,7 @@ mod tests {
let edited_text = EditedText::from_strings(original, &updated.into());
let changes = edited_text.to_diff();
let changes = edited_text.to_diff().unwrap();
let deserialized_edited_text =
EditedText::from_diff(original, changes, &*BuiltinTokenizer::Word).unwrap();

View file

@ -104,28 +104,55 @@ where
}
}
pub fn get_sort_key(&self, insertion_index: usize) -> (usize, usize, usize, String) {
(
self.order(),
match self {
Operation::Delete { .. } => 1,
Operation::Insert { .. } => 2,
Operation::Equal { .. } => 3,
},
insertion_index,
// Make sure that the ordering is deterministic regardless of which text
// is left or right.
match self {
Operation::Equal { length, .. } => length.to_string(),
Operation::Insert { text, .. } => {
text.iter().map(Token::original).collect::<String>()
}
fn type_priority(&self) -> u8 {
match self {
Operation::Delete { .. } => 1,
Operation::Insert { .. } => 2,
Operation::Equal { .. } => 3,
}
}
/// Compare two operations for processing order during merging. Uses
/// (order, type, `insertion_index`) with a deterministic content
/// tiebreaker that avoids allocating.
pub fn cmp_priority(
&self,
self_index: usize,
other: &Self,
other_index: usize,
) -> std::cmp::Ordering {
self.order()
.cmp(&other.order())
.then_with(|| self.type_priority().cmp(&other.type_priority()))
.then_with(|| self_index.cmp(&other_index))
.then_with(|| self.deterministic_content_cmp(other))
}
/// Deterministic tiebreaker based on operation content, so that merge
/// results are identical regardless of which side is left vs right
fn deterministic_content_cmp(&self, other: &Self) -> std::cmp::Ordering {
match (self, other) {
(Operation::Insert { text: t1, .. }, Operation::Insert { text: t2, .. }) => {
let s1 = t1.iter().flat_map(|t| t.original().chars());
let s2 = t2.iter().flat_map(|t| t.original().chars());
s1.cmp(s2)
}
(Operation::Equal { length: l1, .. }, Operation::Equal { length: l2, .. }) => {
l1.cmp(l2)
}
(
Operation::Delete {
deleted_character_count,
deleted_character_count: c1,
..
} => deleted_character_count.to_string(),
},
)
},
Operation::Delete {
deleted_character_count: c2,
..
},
) => c1.cmp(c2),
// Different types are already ordered by type_priority
_ => std::cmp::Ordering::Equal,
}
}
/// Applies the operation to the given `StringBuilder`, returning the
@ -193,10 +220,9 @@ where
}
/// Adjusts this operation based on `previous_operation` from the other side
/// to avoid duplicating or conflicting changes. Updates
/// `previous_operation` in-place.
/// to avoid duplicating or conflicting changes
#[allow(clippy::too_many_lines)]
pub fn merge_operations(self, previous_operation: &mut Option<Self>) -> Operation<T> {
pub fn merge_operations(self, previous_operation: Option<&Self>) -> Operation<T> {
let operation = self;
match (operation, previous_operation) {
@ -295,14 +321,36 @@ where
}
(
ref operation @ Operation::Equal { ref order, .. },
ref operation @ Operation::Equal {
ref order,
#[cfg(debug_assertions)]
ref text,
..
},
Some(Operation::Equal {
order: last_equal_order,
length: last_equal_length,
#[cfg(debug_assertions)]
text: last_equal_text,
..
}),
) => {
if operation.len() == *last_equal_length && *order == *last_equal_order {
// Both sides retained the same span from the original text,
// so we deduplicate by zeroing one out. This is safe because
// both EditedTexts are derived from the same original, and
// matching (order, length) means they cover the same substring
#[cfg(debug_assertions)]
debug_assert_eq!(
text, last_equal_text,
"Equal operations with same order and length should have the same text, \
but got {operation:?} vs {:?}",
Operation::<T>::Equal {
order: *last_equal_order,
length: *last_equal_length,
text: last_equal_text.clone(),
},
);
Operation::create_equal(*order, 0)
} else {
operation.clone()
@ -329,18 +377,20 @@ where
..
} => {
#[cfg(debug_assertions)]
write!(
f,
"<equal {} from {order}>",
text.as_ref()
.map(|text| format!("'{}'", text.replace('\n', "\\n")))
.unwrap_or(format!("{length} characters")),
)?;
{
write!(
f,
"<equal {} from {order}>",
text.as_ref()
.map(|text| format!("'{}'", text.replace('\n', "\\n")))
.unwrap_or(format!("{length} characters")),
)
}
#[cfg(not(debug_assertions))]
write!(f, "<equal {length} from {order}>")?;
Ok(())
{
write!(f, "<equal {length} from {order}>")
}
}
Operation::Insert { order, text, .. } => {
write!(
@ -361,22 +411,24 @@ where
..
} => {
#[cfg(debug_assertions)]
write!(
f,
"<delete {} from {order}>",
deleted_text
.as_ref()
.map(|text| format!("'{}'", text.replace('\n', "\\n")))
.unwrap_or(format!("{deleted_character_count} characters")),
)?;
{
write!(
f,
"<delete {} from {order}>",
deleted_text
.as_ref()
.map(|text| format!("'{}'", text.replace('\n', "\\n")))
.unwrap_or(format!("{deleted_character_count} characters")),
)
}
#[cfg(not(debug_assertions))]
write!(
f,
"<delete {deleted_character_count} characters from {order}>",
)?;
Ok(())
{
write!(
f,
"<delete {deleted_character_count} characters from {order}>",
)
}
}
}
}