Improve compact diff API (#24)
* Remove is_binary from API * Format * Rename file * Test with more feature combinations * Don't depend on serde for wasm * Fix lint & tests * Don't unwrap to MAX number * Expose undiff to JS * Add undiff tests * Lint * Change name
This commit is contained in:
parent
6191d1adb3
commit
e85eb485e8
20 changed files with 430 additions and 424 deletions
|
|
@ -4,15 +4,17 @@ use std::{fmt::Debug, vec};
|
|||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::{
|
||||
BuiltinTokenizer, ChangeSet, CursorPosition, TextWithCursors,
|
||||
BuiltinTokenizer, CursorPosition, TextWithCursors,
|
||||
operation_transformation::{
|
||||
Operation,
|
||||
transport::SimpleOperation,
|
||||
utils::{cook_operations::cook_operations, elongate_operations::elongate_operations},
|
||||
},
|
||||
raw_operation::RawOperation,
|
||||
tokenizer::Tokenizer,
|
||||
types::{history::History, side::Side, span_with_history::SpanWithHistory},
|
||||
types::{
|
||||
history::History, number_or_string::NumberOrString, side::Side,
|
||||
span_with_history::SpanWithHistory,
|
||||
},
|
||||
utils::string_builder::StringBuilder,
|
||||
};
|
||||
|
||||
|
|
@ -105,6 +107,11 @@ where
|
|||
/// from the same original text. The operations are merged using the
|
||||
/// principles of Operational Transformation. The cursors are updated
|
||||
/// accordingly to reflect the changes made by the merged operations.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Panics if there's an integer overflow (in i64) when calculating new
|
||||
/// cursor positions.
|
||||
#[must_use]
|
||||
#[allow(clippy::too_many_lines)]
|
||||
pub fn merge(self, other: Self) -> Self {
|
||||
|
|
@ -166,13 +173,14 @@ where
|
|||
let result = operation.merge_operations(&mut last_other_op);
|
||||
|
||||
if let ref op @ (Operation::Insert { .. } | Operation::Equal { .. }) = result {
|
||||
let merged_length_signed =
|
||||
isize::try_from(merged_length).unwrap_or(isize::MAX);
|
||||
let seen_left_length_signed =
|
||||
isize::try_from(seen_left_length).unwrap_or(isize::MAX);
|
||||
let op_len_signed = isize::try_from(op.len()).unwrap_or(isize::MAX);
|
||||
let original_length_signed =
|
||||
isize::try_from(original_length).unwrap_or(isize::MAX);
|
||||
let merged_length_signed = isize::try_from(merged_length)
|
||||
.expect("merged_length must fit in isize");
|
||||
let seen_left_length_signed = isize::try_from(seen_left_length)
|
||||
.expect("seen_left_length must fit in isize");
|
||||
let op_len_signed =
|
||||
isize::try_from(op.len()).expect("op.len() must fit in isize");
|
||||
let original_length_signed = isize::try_from(original_length)
|
||||
.expect("original_length must fit in isize");
|
||||
|
||||
let shift = merged_length_signed - seen_left_length_signed + op_len_signed
|
||||
- original_length_signed;
|
||||
|
|
@ -199,13 +207,14 @@ where
|
|||
let result = operation.merge_operations(&mut last_other_op);
|
||||
|
||||
if let ref op @ (Operation::Insert { .. } | Operation::Equal { .. }) = result {
|
||||
let merged_length_signed =
|
||||
isize::try_from(merged_length).unwrap_or(isize::MAX);
|
||||
let seen_right_length_signed =
|
||||
isize::try_from(seen_right_length).unwrap_or(isize::MAX);
|
||||
let op_len_signed = isize::try_from(op.len()).unwrap_or(isize::MAX);
|
||||
let original_length_signed =
|
||||
isize::try_from(original_length).unwrap_or(isize::MAX);
|
||||
let merged_length_signed = isize::try_from(merged_length)
|
||||
.expect("merged_length must fit in isize");
|
||||
let seen_right_length_signed = isize::try_from(seen_right_length)
|
||||
.expect("seen_right_length must fit in isize");
|
||||
let op_len_signed =
|
||||
isize::try_from(op.len()).expect("op.len() must fit in isize");
|
||||
let original_length_signed = isize::try_from(original_length)
|
||||
.expect("original_length must fit in isize");
|
||||
|
||||
let shift = merged_length_signed - seen_right_length_signed + op_len_signed
|
||||
- original_length_signed;
|
||||
|
|
@ -345,34 +354,122 @@ where
|
|||
history
|
||||
}
|
||||
|
||||
/// Serialize the `EditedText` as a `ChangeSet`, which contains only
|
||||
/// the operations and cursor positions, but without the original text.
|
||||
/// This is useful for sending changes over the network if there's
|
||||
/// a clear consensus on the original text.
|
||||
/// Convert the `EditedText` into a terse representation ready for
|
||||
/// serialization. The result omits cursor positions and the original text.
|
||||
/// This is useful for sending text diffs over the network if there's a
|
||||
/// clear consensus on the original text.
|
||||
///
|
||||
/// Inserts are represented as strings, deletes as negative integers,
|
||||
/// and equal spans as positive integers.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Panics if there's an integer overflow in i64.
|
||||
#[must_use]
|
||||
pub fn to_change_set(&self) -> ChangeSet {
|
||||
ChangeSet::new(
|
||||
SimpleOperation::from_operations(&self.operations),
|
||||
self.cursors.clone(),
|
||||
)
|
||||
pub fn to_diff(&self) -> Vec<NumberOrString> {
|
||||
let mut result: Vec<NumberOrString> = Vec::with_capacity(self.operations.len());
|
||||
let mut previous_equal: Option<usize> = None;
|
||||
|
||||
for operation in &self.operations {
|
||||
match operation {
|
||||
Operation::Equal { length, .. } => {
|
||||
if let Some(prev_length) = previous_equal {
|
||||
previous_equal = Some(prev_length + *length);
|
||||
} else {
|
||||
previous_equal = Some(*length);
|
||||
}
|
||||
}
|
||||
|
||||
Operation::Insert { text, .. } => {
|
||||
if let Some(prev_length) = previous_equal {
|
||||
result.push(NumberOrString::Number(
|
||||
i64::try_from(prev_length).expect("prev_length must fit in i64"),
|
||||
));
|
||||
previous_equal = None;
|
||||
}
|
||||
|
||||
let text: String = text
|
||||
.iter()
|
||||
.map(super::super::tokenizer::token::Token::original)
|
||||
.collect();
|
||||
result.push(NumberOrString::Text(text));
|
||||
}
|
||||
|
||||
Operation::Delete {
|
||||
deleted_character_count,
|
||||
..
|
||||
} => {
|
||||
if let Some(prev_length) = previous_equal {
|
||||
result.push(NumberOrString::Number(
|
||||
i64::try_from(prev_length).expect("prev_length must fit in i64"),
|
||||
));
|
||||
previous_equal = None;
|
||||
}
|
||||
|
||||
let count = i64::try_from(*deleted_character_count)
|
||||
.expect("deleted_character_count must fit in i64");
|
||||
result.push(NumberOrString::Number(-count));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(prev_length) = previous_equal {
|
||||
result.push(NumberOrString::Number(
|
||||
i64::try_from(prev_length).expect("prev_length must fit in i64"),
|
||||
));
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Deserialize an `EditedText` from a `ChangeSet` and the original text.
|
||||
/// This is useful for reconstructing the `EditedText` on the receiving
|
||||
/// end after sending only the `ChangeSet` over the network.
|
||||
/// Deserialize an `EditedText` from a change list and the original text.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Panics if there's an integer overflow in i64.
|
||||
#[must_use]
|
||||
pub fn from_change_set(
|
||||
text: &'a str,
|
||||
change_set: ChangeSet,
|
||||
pub fn from_diff(
|
||||
original_text: &'a str,
|
||||
diff: Vec<NumberOrString>,
|
||||
tokenizer: &Tokenizer<T>,
|
||||
) -> EditedText<'a, T> {
|
||||
let operations = SimpleOperation::to_operations(change_set.operations, text, tokenizer);
|
||||
let mut operations: Vec<Operation<T>> = Vec::with_capacity(diff.len());
|
||||
let mut order = 0;
|
||||
|
||||
for item in diff {
|
||||
match item {
|
||||
NumberOrString::Number(length) => {
|
||||
if length >= 0 {
|
||||
let length = usize::try_from(length).expect("length must fit in usize");
|
||||
let original_characters: String =
|
||||
original_text.chars().skip(order).take(length).collect();
|
||||
|
||||
let original_tokens = tokenizer(&original_characters);
|
||||
for token in original_tokens {
|
||||
operations
|
||||
.push(Operation::create_equal(order, token.get_original_length()));
|
||||
order += token.get_original_length();
|
||||
}
|
||||
} else {
|
||||
let length =
|
||||
usize::try_from(-length).expect("negative length must fit in usize");
|
||||
operations.push(Operation::create_delete(order, length));
|
||||
order += length;
|
||||
}
|
||||
}
|
||||
NumberOrString::Text(text) => {
|
||||
let tokens = tokenizer(&text);
|
||||
operations.push(Operation::create_insert(order, tokens));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let operation_count = operations.len();
|
||||
EditedText::new(
|
||||
text,
|
||||
original_text,
|
||||
operations,
|
||||
vec![Side::Left; operation_count],
|
||||
change_set.cursors,
|
||||
vec![],
|
||||
)
|
||||
}
|
||||
}
|
||||
|
|
@ -423,34 +520,29 @@ mod tests {
|
|||
assert_eq!(operations.apply().text(), expected);
|
||||
}
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
#[test]
|
||||
fn test_change_set_deserialisation() {
|
||||
fn test_changes_deserialisation() {
|
||||
let original = "Merging text is hard!";
|
||||
let changes = "Merging text is easy with reconcile!";
|
||||
let result = EditedText::from_strings(original, &changes.into());
|
||||
let serialized = serde_yaml::to_string(&result.to_change_set()).unwrap();
|
||||
|
||||
let expected = concat!(
|
||||
"operations:\n",
|
||||
"- 15\n",
|
||||
"- -6\n",
|
||||
"- ' easy with reconcile!'\n",
|
||||
"cursors: []\n"
|
||||
);
|
||||
let serialized = serde_yaml::to_string(&result.to_diff()).unwrap();
|
||||
|
||||
let expected = concat!("- 15\n", "- -6\n", "- ' easy with reconcile!'\n",);
|
||||
assert_eq!(serialized, expected);
|
||||
}
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
#[test]
|
||||
fn test_change_set_serialization() {
|
||||
fn test_changes_serialization() {
|
||||
let original = "The quick brown fox jumps over the lazy dog.";
|
||||
let updated = "The quick red fox jumped over the very lazy dog!";
|
||||
|
||||
let edited_text = EditedText::from_strings(original, &updated.into());
|
||||
|
||||
let change_set = edited_text.to_change_set();
|
||||
let changes = edited_text.to_diff();
|
||||
let deserialized_edited_text =
|
||||
EditedText::from_change_set(original, change_set, &*BuiltinTokenizer::Word);
|
||||
EditedText::from_diff(original, changes, &*BuiltinTokenizer::Word);
|
||||
|
||||
assert_eq!(deserialized_edited_text.apply().text(), updated);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,204 +0,0 @@
|
|||
use std::fmt::Debug;
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{
|
||||
Deserialize, Serialize,
|
||||
de::{self, Deserializer, Visitor},
|
||||
ser::Serializer,
|
||||
};
|
||||
|
||||
use crate::{CursorPosition, Tokenizer, operation_transformation::Operation};
|
||||
|
||||
#[derive(Clone, PartialEq, Eq, Debug)]
|
||||
pub enum SimpleOperation {
|
||||
Equal { length: usize },
|
||||
Insert { text: String },
|
||||
Delete { length: usize },
|
||||
}
|
||||
|
||||
impl SimpleOperation {
|
||||
pub fn from_operations<T>(operation: &Vec<Operation<T>>) -> Vec<Self>
|
||||
where
|
||||
T: PartialEq + Clone + Debug,
|
||||
{
|
||||
let mut result: Vec<Self> = Vec::with_capacity(operation.len());
|
||||
let mut previous_equal: Option<usize> = None;
|
||||
|
||||
for operation in operation {
|
||||
match operation {
|
||||
Operation::Equal { length, .. } => {
|
||||
if let Some(prev_length) = previous_equal {
|
||||
previous_equal = Some(prev_length + *length);
|
||||
} else {
|
||||
previous_equal = Some(*length);
|
||||
}
|
||||
}
|
||||
|
||||
Operation::Insert { text, .. } => {
|
||||
if let Some(prev_length) = previous_equal {
|
||||
result.push(SimpleOperation::Equal {
|
||||
length: prev_length,
|
||||
});
|
||||
previous_equal = None;
|
||||
}
|
||||
|
||||
let text: String = text
|
||||
.iter()
|
||||
.map(super::super::tokenizer::token::Token::original)
|
||||
.collect();
|
||||
result.push(SimpleOperation::Insert { text });
|
||||
}
|
||||
|
||||
Operation::Delete {
|
||||
deleted_character_count,
|
||||
..
|
||||
} => {
|
||||
if let Some(prev_length) = previous_equal {
|
||||
result.push(SimpleOperation::Equal {
|
||||
length: prev_length,
|
||||
});
|
||||
previous_equal = None;
|
||||
}
|
||||
|
||||
result.push(SimpleOperation::Delete {
|
||||
length: *deleted_character_count,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(prev_length) = previous_equal {
|
||||
result.push(SimpleOperation::Equal {
|
||||
length: prev_length,
|
||||
});
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
// This is similar to `crate::operation_transformation::utils::cook_operations`
|
||||
pub fn to_operations<T>(
|
||||
simple_operations: Vec<Self>,
|
||||
original_text: &str,
|
||||
tokenizer: &Tokenizer<T>,
|
||||
) -> Vec<Operation<T>>
|
||||
where
|
||||
T: PartialEq + Clone + Debug,
|
||||
{
|
||||
let mut operations: Vec<Operation<T>> = Vec::with_capacity(simple_operations.len());
|
||||
let mut order = 0;
|
||||
|
||||
for simple_operation in simple_operations {
|
||||
match simple_operation {
|
||||
SimpleOperation::Equal { length } => {
|
||||
let original_characters: String =
|
||||
original_text.chars().skip(order).take(length).collect();
|
||||
|
||||
let original_tokens = tokenizer(&original_characters);
|
||||
for token in original_tokens {
|
||||
operations
|
||||
.push(Operation::create_equal(order, token.get_original_length()));
|
||||
order += token.get_original_length();
|
||||
}
|
||||
}
|
||||
|
||||
SimpleOperation::Insert { text } => {
|
||||
let tokens = tokenizer(&text);
|
||||
operations.push(Operation::create_insert(order, tokens));
|
||||
}
|
||||
|
||||
SimpleOperation::Delete { length } => {
|
||||
operations.push(Operation::create_delete(order, length));
|
||||
order += length;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
operations
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
impl Serialize for SimpleOperation {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: Serializer,
|
||||
{
|
||||
// neat idea from https://github.com/spebern/operational-transform-rs/blob/9faa17f0a2b282ac2e09dbb2d29fdaf2ae0bbb4a/operational-transform/src/serde.rs#L14
|
||||
match self {
|
||||
SimpleOperation::Equal { length } => serializer.serialize_u64(*length as u64),
|
||||
SimpleOperation::Insert { text } => serializer.serialize_str(text),
|
||||
SimpleOperation::Delete { length } => {
|
||||
serializer.serialize_i64(-(i64::try_from(*length).unwrap_or(i64::MAX)))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
impl<'de> Deserialize<'de> for SimpleOperation {
|
||||
fn deserialize<D>(deserializer: D) -> Result<SimpleOperation, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
use std::fmt;
|
||||
|
||||
struct OperationVisitor;
|
||||
|
||||
impl Visitor<'_> for OperationVisitor {
|
||||
type Value = SimpleOperation;
|
||||
|
||||
fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
formatter.write_str("an integer between -2^63 and 2^64-1 or a string")
|
||||
}
|
||||
|
||||
fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E>
|
||||
where
|
||||
E: de::Error,
|
||||
{
|
||||
Ok(SimpleOperation::Equal {
|
||||
length: usize::try_from(value).unwrap_or(usize::MAX),
|
||||
})
|
||||
}
|
||||
|
||||
fn visit_i64<E>(self, value: i64) -> Result<Self::Value, E>
|
||||
where
|
||||
E: de::Error,
|
||||
{
|
||||
Ok(SimpleOperation::Delete {
|
||||
length: usize::try_from(-value).unwrap_or(usize::MAX),
|
||||
})
|
||||
}
|
||||
|
||||
fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
|
||||
where
|
||||
E: de::Error,
|
||||
{
|
||||
Ok(SimpleOperation::Insert {
|
||||
text: value.to_owned(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
deserializer.deserialize_any(OperationVisitor)
|
||||
}
|
||||
}
|
||||
|
||||
/// A serializable representation of the changes made to a text document
|
||||
/// without the original text.
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone, PartialEq, Default)]
|
||||
pub struct ChangeSet {
|
||||
pub operations: Vec<SimpleOperation>,
|
||||
pub cursors: Vec<CursorPosition>,
|
||||
}
|
||||
|
||||
impl ChangeSet {
|
||||
#[must_use]
|
||||
pub fn new(operations: Vec<SimpleOperation>, cursors: Vec<CursorPosition>) -> Self {
|
||||
Self {
|
||||
operations,
|
||||
cursors,
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue