Add cursor moving (#19)

This commit is contained in:
Andras Schmelczer 2025-04-02 22:06:38 +01:00 committed by GitHub
parent 29d8779786
commit 1f9728d893
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
49 changed files with 1105 additions and 141 deletions

View file

@ -3,5 +3,8 @@ mod operation_transformation;
mod tokenizer;
mod utils;
pub use operation_transformation::{EditedText, reconcile, reconcile_with_tokenizer};
pub use tokenizer::token::Token;
pub use operation_transformation::{
CursorPosition, EditedText, TextWithCursors, reconcile, reconcile_with_cursors,
reconcile_with_tokenizer,
};
pub use tokenizer::{Tokenizer, token::Token};

View file

@ -1,41 +1,42 @@
mod cursor;
mod edited_text;
mod merge_context;
mod operation;
pub use cursor::{CursorPosition, TextWithCursors};
pub use edited_text::EditedText;
pub use operation::Operation;
use crate::tokenizer::Tokenizer;
use crate::Tokenizer;
#[must_use]
pub fn reconcile(original: &str, left: &str, right: &str) -> String {
// Common trivial cases
if left == right {
return left.to_owned();
}
reconcile_with_cursors(original, left.into(), right.into())
.text
.to_string()
}
if original == left {
return right.to_owned();
}
if original == right {
return left.to_owned();
}
// 3-way merge
#[must_use]
pub fn reconcile_with_cursors<'a>(
original: &'a str,
left: TextWithCursors<'a>,
right: TextWithCursors<'a>,
) -> TextWithCursors<'static> {
let left_operations = EditedText::from_strings(original, left);
let right_operations = EditedText::from_strings(original, right);
let merged_operations = left_operations.merge(right_operations);
merged_operations.apply()
TextWithCursors::new_owned(merged_operations.apply(), merged_operations.cursors)
}
pub fn reconcile_with_tokenizer<F, T>(
#[must_use]
pub fn reconcile_with_tokenizer<'a, F, T>(
original: &str,
left: &str,
right: &str,
left: TextWithCursors<'a>,
right: TextWithCursors<'a>,
tokenizer: &Tokenizer<T>,
) -> String
) -> TextWithCursors<'static>
where
T: PartialEq + Clone + std::fmt::Debug,
{
@ -43,7 +44,8 @@ where
let right_operations = EditedText::from_strings_with_tokenizer(original, right, tokenizer);
let merged_operations = left_operations.merge(right_operations);
merged_operations.apply()
TextWithCursors::new_owned(merged_operations.apply(), merged_operations.cursors)
}
#[cfg(test)]
@ -54,6 +56,7 @@ mod test {
use test_case::test_matrix;
use super::*;
use crate::CursorPosition;
#[test]
fn test_merges() {
@ -172,6 +175,188 @@ mod test {
" |7ca2b36d-6ee7-49eb-8eb1-d77e4cc1a001| |cd9195cc-103a-4f13-90c8-4fba0ba421ee| |d39156cc-cfd6-42a8-b70a-75020896069d| |fbad794c-9c47-41f2-a343-490284ecb5a0| |dup| |dup| ");
}
#[test]
fn test_cursor_position_no_updates() {
let original = "hello world";
let left = TextWithCursors::new(
"hello world",
vec![CursorPosition {
id: 0,
char_index: 0,
}],
);
let right = TextWithCursors::new(
"hello world",
vec![CursorPosition {
id: 1,
char_index: 5,
}],
);
let merged = reconcile_with_cursors(original, left, right);
assert_eq!(
merged,
TextWithCursors::new(
"hello world",
vec![
CursorPosition {
id: 0,
char_index: 0
},
CursorPosition {
id: 1,
char_index: 5
}
]
)
);
}
#[test]
fn test_cursor_position_updates_with_inserts() {
let original = "hi";
let left = TextWithCursors::new(
"hi there",
vec![CursorPosition {
id: 0,
char_index: 7,
}],
);
let right = TextWithCursors::new(
"hi world!",
vec![
CursorPosition {
id: 1,
char_index: 9,
},
CursorPosition {
id: 2,
char_index: 1,
},
],
);
let merged = reconcile_with_cursors(original, left, right);
assert_eq!(
merged,
TextWithCursors::new(
"hi there world!",
vec![
CursorPosition {
id: 2,
char_index: 1,
},
CursorPosition {
id: 0,
char_index: 7
},
CursorPosition {
id: 1,
char_index: 15
},
]
)
);
}
#[test]
fn test_cursor_position_updates_with_deleted() {
let original = "a b c d";
let left = TextWithCursors::new(
"a b d",
vec![CursorPosition {
id: 0,
char_index: 1, // after a
}],
);
let right = TextWithCursors::new(
"c d",
vec![CursorPosition {
id: 1,
char_index: 1, // after c
}],
);
let merged = reconcile_with_cursors(original, left, right);
assert_eq!(
merged,
TextWithCursors::new(
" d",
vec![
CursorPosition {
id: 0,
char_index: 0
},
CursorPosition {
id: 1,
char_index: 1
}
]
)
);
}
#[test]
fn test_cursor_complex() {
let original = "this is some complex text to test cursor positions";
let left = TextWithCursors::new(
"this is really complex text for testing cursor positions",
vec![
CursorPosition {
id: 0,
char_index: 8,
}, // after "this is "
CursorPosition {
id: 1,
char_index: 22,
}, // after "this is really complex text"
],
);
let right = TextWithCursors::new(
"that was some complex sample to test cursor movements",
vec![
CursorPosition {
id: 2,
char_index: 5,
}, // after "that "
CursorPosition {
id: 3,
char_index: 29,
}, // after "some complex sample "
],
);
let merged = reconcile_with_cursors(original, left, right);
assert_eq!(
merged,
TextWithCursors::new(
"that was really complex sample for testing cursor movements",
vec![
CursorPosition {
id: 2,
char_index: 5
}, // unchanged
CursorPosition {
id: 0,
char_index: 9
}, // before "really"
CursorPosition {
id: 1,
char_index: 23
}, // inside of "s|ample" because "text" got replaced by "sample"
CursorPosition {
id: 3,
char_index: 31
}, // before "for"
]
)
);
}
#[test_matrix( [
"pride_and_prejudice.txt",
"romeo_and_juliet.txt",
@ -200,7 +385,7 @@ mod test {
let files = [file_name_1, file_name_2, file_name_3];
let permutations = [range_1, range_2, range_3];
let root = Path::new("test/resources/");
let root = Path::new("tests/resources/");
let contents = files
.iter()

View file

@ -0,0 +1,68 @@
use std::borrow::Cow;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use super::merge_context::MergeContext;
use crate::operation_transformation::Operation;
// CursorPosition represents the position of an identifiable cursor in a text
// document based on its (UTF-8) character index.
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone, PartialEq, Default)]
pub struct CursorPosition {
pub id: usize,
pub char_index: usize,
}
impl CursorPosition {
#[must_use]
pub fn apply_merge_context<T>(&self, context: &MergeContext<T>) -> Self
where
T: PartialEq + Clone + std::fmt::Debug,
{
let char_index = match context.last_operation() {
Some(Operation::Delete { index, .. }) => (*index) as i64,
_ => self.char_index as i64 + context.shift,
};
CursorPosition {
id: self.id,
char_index: char_index.max(0) as usize,
}
}
}
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone, PartialEq, Default)]
pub struct TextWithCursors<'a> {
pub text: Cow<'a, str>,
pub cursors: Vec<CursorPosition>,
}
impl<'a> TextWithCursors<'a> {
#[must_use]
pub fn new(text: &'a str, cursors: Vec<CursorPosition>) -> Self {
Self {
text: text.into(),
cursors,
}
}
#[must_use]
pub fn new_owned(text: String, cursors: Vec<CursorPosition>) -> Self {
Self {
text: text.into(),
cursors,
}
}
}
impl<'a> From<&'a str> for TextWithCursors<'a> {
fn from(text: &'a str) -> Self {
Self {
text: text.into(),
cursors: Vec::new(),
}
}
}

View file

@ -3,7 +3,7 @@ use core::iter;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use super::Operation;
use super::{CursorPosition, Operation, TextWithCursors};
use crate::{
diffs::{myers::diff, raw_operation::RawOperation},
operation_transformation::merge_context::MergeContext,
@ -29,6 +29,7 @@ where
{
text: &'a str,
operations: Vec<OrderedOperation<T>>,
pub(crate) cursors: Vec<CursorPosition>,
}
impl<'a> EditedText<'a, String> {
@ -39,7 +40,7 @@ impl<'a> EditedText<'a, String> {
/// word tokenizer is used to tokenize the text which splits the text on
/// whitespaces.
#[must_use]
pub fn from_strings(original: &'a str, updated: &str) -> Self {
pub fn from_strings(original: &'a str, updated: TextWithCursors<'a>) -> Self {
Self::from_strings_with_tokenizer(original, updated, &word_tokenizer)
}
}
@ -55,17 +56,18 @@ where
/// function is used to tokenize the text.
pub fn from_strings_with_tokenizer(
original: &'a str,
updated: &str,
updated: TextWithCursors<'a>,
tokenizer: &Tokenizer<T>,
) -> Self {
let original_tokens = (tokenizer)(original);
let updated_tokens = (tokenizer)(updated);
let updated_tokens = (tokenizer)(&updated.text);
let diff: Vec<RawOperation<T>> = diff(&original_tokens, &updated_tokens);
Self::new(
original,
Self::cook_operations(Self::elongate_operations(diff)).collect(),
updated.cursors,
)
}
@ -170,7 +172,11 @@ where
/// Create a new `EditedText` with the given operations.
/// The operations must be in the order in which they are meant to be
/// applied. The operations must not overlap.
fn new(text: &'a str, operations: Vec<OrderedOperation<T>>) -> Self {
fn new(
text: &'a str,
operations: Vec<OrderedOperation<T>>,
mut cursors: Vec<CursorPosition>,
) -> Self {
operations
.iter()
.zip(operations.iter().skip(1))
@ -183,7 +189,13 @@ where
);
});
Self { text, operations }
cursors.sort_by_key(|cursor| cursor.char_index);
Self {
text,
operations,
cursors,
}
}
#[must_use]
@ -196,50 +208,110 @@ where
let mut left_merge_context = MergeContext::default();
let mut right_merge_context = MergeContext::default();
Self::new(
self.text,
self.operations
.into_iter()
.map(|op| (op, Side::Left))
.merge_sorted_by_key(
other.operations.into_iter().map(|op| (op, Side::Right)),
|(operation, _)| {
(
operation.order,
// Operations on the left and right must come in the same order so that
// inserts can be merged with other inserts and deletes with deletes.
usize::from(matches!(operation.operation, Operation::Delete { .. })),
// Make sure that the ordering is deterministic regardless which text
// is left or right.
match &operation.operation {
Operation::Insert { text, .. } => text
.iter()
.map(super::super::tokenizer::token::Token::original)
.collect::<String>(),
Operation::Delete {
deleted_character_count,
..
} => deleted_character_count.to_string(),
},
let mut merged_cursors = Vec::with_capacity(self.cursors.len() + other.cursors.len());
let mut left_cursors = self.cursors.iter().peekable();
let mut right_cursors = other.cursors.iter().peekable();
let merged_operations = self
.operations
.into_iter()
// The current text is always the left; the other operation is the right side.
.map(|op| (op, Side::Left))
.merge_sorted_by_key(
other.operations.into_iter().map(|op| (op, Side::Right)),
|(operation, _)| {
(
operation.order,
// Operations on the left and right must come in the same order so that
// inserts can be merged with other inserts and deletes with deletes.
usize::from(matches!(operation.operation, Operation::Delete { .. })),
// Make sure that the ordering is deterministic regardless which text
// is left or right.
match &operation.operation {
Operation::Insert { text, .. } => text
.iter()
.map(super::super::tokenizer::token::Token::original)
.collect::<String>(),
Operation::Delete {
deleted_character_count,
..
} => deleted_character_count.to_string(),
},
)
},
)
.flat_map(|(OrderedOperation { order, operation }, side)| {
match side {
Side::Left => {
while let Some(cursor) = left_cursors
.next_if(|cursor| cursor.char_index <= operation.start_index())
{
right_merge_context.consume_last_operation_if_it_is_too_behind(
cursor.char_index as i64,
);
merged_cursors.push(cursor.apply_merge_context(&right_merge_context));
}
while let Some(cursor) = right_cursors.next_if(|cursor| {
cursor.char_index as i64
<= operation.start_index() as i64 + right_merge_context.shift
- left_merge_context.shift
}) {
left_merge_context.consume_last_operation_if_it_is_too_behind(
cursor.char_index as i64,
);
merged_cursors.push(cursor.apply_merge_context(&left_merge_context));
}
operation.merge_operations_with_context(
&mut right_merge_context,
&mut left_merge_context,
)
},
)
.flat_map(|(OrderedOperation { order, operation }, side)| {
match side {
Side::Left => operation.merge_operations_with_context(
&mut right_merge_context,
&mut left_merge_context,
),
Side::Right => operation.merge_operations_with_context(
&mut left_merge_context,
&mut right_merge_context,
),
}
.map(|operation| OrderedOperation { order, operation })
.into_iter()
})
.collect(),
)
Side::Right => {
while let Some(cursor) = right_cursors
.next_if(|cursor| cursor.char_index <= operation.start_index())
{
left_merge_context.consume_last_operation_if_it_is_too_behind(
cursor.char_index as i64,
);
merged_cursors.push(cursor.apply_merge_context(&left_merge_context));
}
while let Some(cursor) = left_cursors.next_if(|cursor| {
cursor.char_index as i64
<= operation.start_index() as i64 + left_merge_context.shift
- right_merge_context.shift
}) {
right_merge_context.consume_last_operation_if_it_is_too_behind(
cursor.char_index as i64,
);
merged_cursors.push(cursor.apply_merge_context(&right_merge_context));
}
operation.merge_operations_with_context(
&mut left_merge_context,
&mut right_merge_context,
)
}
}
.map(|operation| OrderedOperation { order, operation })
.into_iter()
})
.collect();
for cursor in left_cursors {
right_merge_context
.consume_last_operation_if_it_is_too_behind(cursor.char_index as i64);
merged_cursors.push(cursor.apply_merge_context(&right_merge_context));
}
for cursor in right_cursors {
left_merge_context.consume_last_operation_if_it_is_too_behind(cursor.char_index as i64);
merged_cursors.push(cursor.apply_merge_context(&left_merge_context));
}
Self::new(self.text, merged_operations, merged_cursors)
}
/// Apply the operations to the text and return the resulting text.
@ -268,7 +340,7 @@ mod tests {
let left = "hello world! How are you? Adam";
let right = "Hello, my friend! How are you doing? Albert";
let operations = EditedText::from_strings(left, right);
let operations = EditedText::from_strings(left, right.into());
insta::assert_debug_snapshot!(operations);
@ -280,7 +352,7 @@ mod tests {
fn test_calculate_operations_with_no_diff() {
let text = "hello world!";
let operations = EditedText::from_strings(text, text);
let operations = EditedText::from_strings(text, text.into());
assert_eq!(operations.operations.len(), 0);
@ -296,8 +368,8 @@ mod tests {
let right = "Hello world! How are you?";
let expected = "Hello world! How are you? I'm Andras.";
let operations_1 = EditedText::from_strings(original, left);
let operations_2 = EditedText::from_strings(original, right);
let operations_1 = EditedText::from_strings(original, left.into());
let operations_2 = EditedText::from_strings(original, right.into());
let operations = operations_1.merge(operations_2);
assert_eq!(operations.apply(), expected);

View file

@ -2,7 +2,7 @@ use core::fmt::Debug;
use crate::operation_transformation::Operation;
#[derive(Clone)]
#[derive(Clone, Debug)]
pub struct MergeContext<T>
where
T: PartialEq + Clone + std::fmt::Debug,
@ -23,26 +23,19 @@ where
}
}
impl<T> Debug for MergeContext<T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.debug_struct("MergeContext")
.field("last_operation", &self.last_operation)
.field("shift", &self.shift)
.finish()
}
}
impl<T> MergeContext<T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
pub fn last_operation(&self) -> Option<&Operation<T>> { self.last_operation.as_ref() }
pub fn replace_last_operation(&mut self, operation: Option<Operation<T>>) {
self.last_operation = operation;
}
/// Replace the last delete operation (if there was one) with a new one
/// while applying it to the shift.
/// while applying it to the `shift` in case the last operation
/// was a delete.
pub fn consume_and_replace_last_operation(&mut self, operation: Option<Operation<T>>) {
if let Some(Operation::Delete {
deleted_character_count,
@ -55,32 +48,22 @@ where
self.last_operation = operation;
}
pub fn replace_last_operation(&mut self, operation: Option<Operation<T>>) {
self.last_operation = operation;
}
/// Remove the last operation (if there was one) in case it is behind the
/// threshold operation. This changes the shift in case the last operation
/// threshold operation. This updates the `shift` in case the last operation
/// was a delete.
pub fn consume_last_operation_if_it_is_too_behind(
&mut self,
threshold_operation: &Operation<T>,
) {
pub fn consume_last_operation_if_it_is_too_behind(&mut self, threshold_index: i64) {
if let Some(last_operation) = self.last_operation.as_ref() {
if let Operation::Delete {
deleted_character_count,
..
} = last_operation
{
if threshold_operation.start_index() as i64 + self.shift
> last_operation.end_index() as i64
{
if threshold_index + self.shift > last_operation.end_index() as i64 {
self.shift -= *deleted_character_count as i64;
self.last_operation = None;
}
} else if let Operation::Insert { .. } = last_operation {
if threshold_operation.start_index() as i64 + self.shift
- last_operation.len() as i64
if threshold_index + self.shift - last_operation.len() as i64
> last_operation.end_index() as i64
{
self.last_operation = None;

View file

@ -189,7 +189,7 @@ where
affecting_context: &mut MergeContext<T>,
produced_context: &mut MergeContext<T>,
) -> Option<Operation<T>> {
affecting_context.consume_last_operation_if_it_is_too_behind(&self);
affecting_context.consume_last_operation_if_it_is_too_behind(self.start_index() as i64);
let operation = self.with_shifted_index(affecting_context.shift);
match (operation, affecting_context.last_operation()) {

View file

@ -23,4 +23,5 @@ EditedText {
operation: <delete ' you? Adam' from index 43>,
},
],
cursors: [],
}

View file

@ -1,4 +1,16 @@
use std::fmt::Display;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Side {
Left,
Right,
}
impl Display for Side {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Side::Left => write!(f, "Left"),
Side::Right => write!(f, "Right"),
}
}
}