Add mergeTextWithHistory function

This commit is contained in:
Andras Schmelczer 2025-06-22 20:49:11 +01:00
parent c0333c1146
commit 779579d38f
No known key found for this signature in database
GPG key ID: FC8F2C3D3D1A718C
18 changed files with 285 additions and 100 deletions

View file

@ -1,3 +1,5 @@
#![feature(stmt_expr_attributes)]
mod diffs;
mod operation_transformation;
mod tokenizer;
@ -5,9 +7,10 @@ mod utils;
pub use operation_transformation::{
CursorPosition, EditedText, TextWithCursors, reconcile, reconcile_with_cursors,
reconcile_with_tokenizer,
reconcile_with_history, reconcile_with_tokenizer,
};
pub use tokenizer::{Tokenizer, token::Token, word_tokenizer::word_tokenizer};
pub use utils::{history::History, side::Side};
#[cfg(feature = "wasm")]
pub mod wasm;

View file

@ -7,7 +7,10 @@ pub use cursor::{CursorPosition, TextWithCursors};
pub use edited_text::EditedText;
pub use operation::Operation;
use crate::Tokenizer;
use crate::{
Tokenizer,
utils::{history::History, side::Side},
};
#[must_use]
pub fn reconcile(original: &str, left: &str, right: &str) -> String {
@ -16,14 +19,22 @@ pub fn reconcile(original: &str, left: &str, right: &str) -> String {
.to_string()
}
#[must_use]
pub fn reconcile_with_history(original: &str, left: &str, right: &str) -> Vec<(History, String)> {
let left_operations = EditedText::from_strings(original, left.into(), Side::Left);
let right_operations = EditedText::from_strings(original, right.into(), Side::Right);
left_operations.merge(right_operations).apply_with_history()
}
#[must_use]
pub fn reconcile_with_cursors<'a>(
original: &'a str,
left: TextWithCursors<'a>,
right: TextWithCursors<'a>,
) -> TextWithCursors<'static> {
let left_operations = EditedText::from_strings(original, left);
let right_operations = EditedText::from_strings(original, right);
let left_operations = EditedText::from_strings(original, left, Side::Left);
let right_operations = EditedText::from_strings(original, right, Side::Right);
let merged_operations = left_operations.merge(right_operations);
@ -40,8 +51,10 @@ pub fn reconcile_with_tokenizer<'a, F, T>(
where
T: PartialEq + Clone + std::fmt::Debug,
{
let left_operations = EditedText::from_strings_with_tokenizer(original, left, tokenizer);
let right_operations = EditedText::from_strings_with_tokenizer(original, right, tokenizer);
let left_operations =
EditedText::from_strings_with_tokenizer(original, left, tokenizer, Side::Left);
let right_operations =
EditedText::from_strings_with_tokenizer(original, right, tokenizer, Side::Right);
let merged_operations = left_operations.merge(right_operations);

View file

@ -8,7 +8,7 @@ use crate::{
cook_operations::cook_operations, elongate_operations::elongate_operations,
},
tokenizer::{Tokenizer, word_tokenizer::word_tokenizer},
utils::{side::Side, string_builder::StringBuilder},
utils::{history::History, side::Side, string_builder::StringBuilder},
};
/// A text document and a sequence of operations that can be applied to the text
@ -42,8 +42,8 @@ impl<'a> EditedText<'a, String> {
/// word tokenizer is used to tokenize the text which splits the text on
/// whitespaces.
#[must_use]
pub fn from_strings(original: &'a str, updated: TextWithCursors<'a>) -> Self {
Self::from_strings_with_tokenizer(original, updated, &word_tokenizer)
pub fn from_strings(original: &'a str, updated: TextWithCursors<'a>, side: Side) -> Self {
Self::from_strings_with_tokenizer(original, updated, &word_tokenizer, side)
}
}
@ -60,6 +60,7 @@ where
original: &'a str,
updated: TextWithCursors<'a>,
tokenizer: &Tokenizer<T>,
side: Side,
) -> Self {
let original_tokens = (tokenizer)(original);
let updated_tokens = (tokenizer)(&updated.text);
@ -68,7 +69,7 @@ where
Self::new(
original,
cook_operations(elongate_operations(diff)).collect(),
cook_operations(elongate_operations(diff), side).collect(),
updated.cursors,
)
}
@ -223,6 +224,39 @@ where
builder.build()
}
#[must_use]
pub fn apply_with_history(&self) -> Vec<(History, String)> {
let mut builder: StringBuilder<'_> = StringBuilder::new(self.text);
let mut history = Vec::with_capacity(self.operations.len());
for operation in &self.operations {
builder = operation.apply(builder);
match operation {
Operation::Equal { .. } => history.push((History::Unchanged, builder.take())),
Operation::Insert { side, .. } => match side {
Side::Left => history.push((History::AddedFromLeft, builder.take())),
Side::Right => history.push((History::AddedFromRight, builder.take())),
},
Operation::Delete {
deleted_character_count,
order,
side,
..
} => {
let deleted = self.text[*order..*order + *deleted_character_count].to_string();
match side {
Side::Left => history.push((History::RemovedFromLeft, deleted)),
Side::Right => history.push((History::RemovedFromRight, deleted)),
}
}
}
}
history
}
}
#[cfg(test)]
@ -237,7 +271,7 @@ mod tests {
let left = "hello world! How are you? Adam";
let right = "Hello, my friend! How are you doing? Albert";
let operations = EditedText::from_strings(left, right.into());
let operations = EditedText::from_strings(left, right.into(), Side::Right);
insta::assert_debug_snapshot!(operations);
@ -249,7 +283,7 @@ mod tests {
fn test_calculate_operations_with_no_diff() {
let text = "hello world!";
let operations = EditedText::from_strings(text, text.into());
let operations = EditedText::from_strings(text, text.into(), Side::Right);
assert_debug_snapshot!(operations);
@ -264,8 +298,8 @@ mod tests {
let right = "Hello world! How are you?";
let expected = "Hello world! How are you? I'm Andras.";
let operations_1 = EditedText::from_strings(original, left.into());
let operations_2 = EditedText::from_strings(original, right.into());
let operations_1 = EditedText::from_strings(original, left.into(), Side::Left);
let operations_2 = EditedText::from_strings(original, right.into(), Side::Right);
let operations = operations_1.merge(operations_2);
assert_eq!(operations.apply(), expected);

View file

@ -6,7 +6,7 @@ use serde::{Deserialize, Serialize};
use crate::{
Token,
utils::{
find_longest_prefix_contained_within::find_longest_prefix_contained_within,
find_longest_prefix_contained_within::find_longest_prefix_contained_within, side::Side,
string_builder::StringBuilder,
},
};
@ -27,11 +27,15 @@ where
},
Insert {
side: Side,
order: usize,
text: Vec<Token<T>>,
},
Delete {
side: Side,
order: usize,
deleted_character_count: usize,
@ -68,14 +72,15 @@ where
}
/// Creates an insert operation with the given index and text.
pub fn create_insert(order: usize, text: Vec<Token<T>>) -> Self {
Operation::Insert { order, text }
pub fn create_insert(order: usize, text: Vec<Token<T>>, side: Side) -> Self {
Operation::Insert { side, order, text }
}
/// Creates a delete operation with the given index and number of
/// to-be-deleted characters.
pub fn create_delete(order: usize, deleted_character_count: usize) -> Self {
pub fn create_delete(order: usize, deleted_character_count: usize, side: Side) -> Self {
Operation::Delete {
side,
order,
deleted_character_count,
@ -84,8 +89,9 @@ where
}
}
pub fn create_delete_with_text(order: usize, text: String) -> Self {
pub fn create_delete_with_text(order: usize, text: String, side: Side) -> Self {
Operation::Delete {
side,
order,
deleted_character_count: text.chars().count(),
@ -200,7 +206,7 @@ where
match (operation, previous_operation) {
(
Operation::Insert { order, text },
Operation::Insert { side, order, text },
Some(Operation::Insert {
text: previous_inserted_text,
..
@ -212,11 +218,12 @@ where
let offset_in_tokens =
find_longest_prefix_contained_within(previous_inserted_text, &text);
Operation::create_insert(order, text[offset_in_tokens..].to_vec())
Operation::create_insert(order, text[offset_in_tokens..].to_vec(), side)
}
(
Operation::Delete {
side,
order,
deleted_character_count,
@ -240,19 +247,20 @@ where
#[cfg(debug_assertions)]
let updated_delete = deleted_text.as_ref().map_or_else(
|| Operation::create_delete(order + overlap, new_length),
|| Operation::create_delete(order + overlap, new_length, side),
|text| {
Operation::create_delete_with_text(
order + overlap,
text.chars()
.skip(deleted_character_count - new_length)
.collect::<String>(),
side,
)
},
);
#[cfg(not(debug_assertions))]
let updated_delete = Operation::create_delete(order + overlap, new_length);
let updated_delete = Operation::create_delete(order + overlap, new_length, side);
updated_delete
}
@ -334,6 +342,7 @@ where
#[cfg(debug_assertions)]
text,
..
} => {
#[cfg(debug_assertions)]
write!(
@ -349,7 +358,7 @@ where
Ok(())
}
Operation::Insert { order, text } => {
Operation::Insert { order, text, .. } => {
write!(
f,
"<insert '{}' at {order}>",
@ -365,6 +374,7 @@ where
#[cfg(debug_assertions)]
deleted_text,
..
} => {
#[cfg(debug_assertions)]
write!(
@ -404,7 +414,8 @@ mod tests {
#[test]
fn test_apply_delete_with_create() {
let builder = StringBuilder::new("hello world");
let delete_operation = Operation::<()>::create_delete_with_text(0, "hello ".to_owned());
let delete_operation =
Operation::<()>::create_delete_with_text(0, "hello ".to_owned(), Side::Left);
let retain_operation = Operation::<()>::create_equal(6, 5);
let mut builder = delete_operation.apply(builder);
@ -418,7 +429,7 @@ mod tests {
let builder = StringBuilder::new("hello");
let retain_operation = Operation::<()>::create_equal(0, 5);
let insert_operation = Operation::create_insert(5, vec![" my friend".into()]);
let insert_operation = Operation::create_insert(5, vec![" my friend".into()], Side::Right);
let mut builder = retain_operation.apply(builder);
builder = insert_operation.apply(builder);

View file

@ -1,8 +1,10 @@
use crate::{diffs::raw_operation::RawOperation, operation_transformation::Operation};
use crate::{
diffs::raw_operation::RawOperation, operation_transformation::Operation, utils::side::Side,
};
/// Turn raw operations into ordered operations while keeping track of the
/// original token's indexes.
pub fn cook_operations<I, T>(raw_operations: I) -> impl Iterator<Item = Operation<T>>
pub fn cook_operations<I, T>(raw_operations: I, side: Side) -> impl Iterator<Item = Operation<T>>
where
I: IntoIterator<Item = RawOperation<T>>,
T: PartialEq + Clone + std::fmt::Debug,
@ -27,15 +29,18 @@ where
op
}
RawOperation::Insert(tokens) => Operation::create_insert(original_text_index, tokens),
RawOperation::Insert(tokens) => {
Operation::create_insert(original_text_index, tokens, side)
}
RawOperation::Delete(..) => {
let op = if cfg!(debug_assertions) {
Operation::create_delete_with_text(
original_text_index,
raw_operation.get_original_text(),
side,
)
} else {
Operation::create_delete(original_text_index, length)
Operation::create_delete(original_text_index, length, side)
};
original_text_index += length;

View file

@ -1,5 +1,6 @@
pub mod common_prefix_len;
pub mod common_suffix_len;
pub mod find_longest_prefix_contained_within;
pub mod history;
pub mod side;
pub mod string_builder;

15
src/utils/history.rs Normal file
View file

@ -0,0 +1,15 @@
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
#[cfg(feature = "wasm")]
use wasm_bindgen::prelude::*;
#[cfg_attr(feature = "wasm", wasm_bindgen)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum History {
Unchanged = "Unchanged",
AddedFromLeft = "AddedFromLeft",
AddedFromRight = "AddedFromRight",
RemovedFromLeft = "RemovedFromLeft",
RemovedFromRight = "RemovedFromRight",
}

View file

@ -1,5 +1,9 @@
use std::fmt::Display;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Side {
Left,

View file

@ -35,7 +35,8 @@ impl StringBuilder<'_> {
self.original.nth(length - 1);
if cfg!(debug_assertions) {
#[cfg(debug_assertions)]
{
self.remaining = self.remaining.chars().skip(length).collect();
}
}
@ -44,20 +45,28 @@ impl StringBuilder<'_> {
pub fn retain(&mut self, length: usize) {
self.buffer.extend(self.original.by_ref().take(length));
if cfg!(debug_assertions) {
#[cfg(debug_assertions)]
{
self.remaining = self.remaining.chars().skip(length).collect();
}
}
/// Returns the currently built buffer and clears it.
pub fn take(&mut self) -> String {
let result = self.buffer.clone();
self.buffer.clear();
result
}
/// Finish building the string after copying the remaining original string
/// since the last insertion or deletion.
pub fn build(self) -> String { self.buffer }
#[cfg(debug_assertions)]
/// Get a slice of the remaining original string. The slice starts from
/// where the next delete/retain operation would start and is of length
/// `length`. The implementation is quite suboptimal but it's only used
/// for debugging.
#[cfg(debug_assertions)]
pub fn get_slice_from_remaining(&self, length: usize) -> String {
let result = self.remaining.chars().take(length).collect::<String>();

View file

@ -1,2 +1,2 @@
pub mod cursor;
pub mod lib;
pub mod types;

View file

@ -13,7 +13,7 @@ use core::str;
use wasm_bindgen::prelude::*;
use crate::wasm::cursor::JsTextWithCursors;
use crate::wasm::types::{JsTextWithCursors, JsTextWithHistory};
/// Merge two documents with a common parent. Relies on `reconcile::reconcile`
/// for texts and returns the right document as-is if either of the updated
@ -58,6 +58,18 @@ pub fn merge_text(parent: &str, left: &str, right: &str) -> String {
crate::reconcile(parent, left, right)
}
/// WASM wrapper around `crate::reconcile` for merging text.
#[wasm_bindgen(js_name = mergeTextWithHistory)]
#[must_use]
pub fn merge_text_with_history(parent: &str, left: &str, right: &str) -> Vec<JsTextWithHistory> {
set_panic_hook();
crate::reconcile_with_history(parent, left, right)
.into_iter()
.map(Into::into)
.collect()
}
/// WASM wrapper around `reconcile::reconcile_with_cursors` for merging text.
#[wasm_bindgen(js_name = mergeTextWithCursors)]
#[must_use]

View file

@ -1,5 +1,7 @@
use wasm_bindgen::prelude::*;
use crate::History;
/// Wrapper type to expose `TextWithCursors` to JS.
#[wasm_bindgen]
#[derive(Debug, Clone, PartialEq)]
@ -86,3 +88,24 @@ impl From<crate::CursorPosition> for JsCursorPosition {
}
}
}
/// Wrapper type to expose `(History, String)` to JS.
#[wasm_bindgen]
#[derive(Debug, Clone, PartialEq)]
pub struct JsTextWithHistory {
history: History,
text: String,
}
impl From<(History, String)> for JsTextWithHistory {
fn from((history, text): (History, String)) -> Self { JsTextWithHistory { history, text } }
}
#[wasm_bindgen]
impl JsTextWithHistory {
#[must_use]
pub fn history(&self) -> History { self.history }
#[must_use]
pub fn text(&self) -> String { self.text.clone() }
}