Store tokens instead of text in insert
This commit is contained in:
parent
3391d2c0ec
commit
44f9a44f63
10 changed files with 144 additions and 74 deletions
|
|
@ -18,7 +18,6 @@
|
|||
//! without making reasonable progress.
|
||||
//! For potential improvements here see [similar#15](https://github.com/mitsuhiko/similar/issues/15).
|
||||
|
||||
use std::hash::Hash;
|
||||
use std::ops::{Index, IndexMut, Range};
|
||||
use std::vec;
|
||||
|
||||
|
|
@ -36,7 +35,7 @@ use super::raw_operation::RawOperation;
|
|||
/// execution time permitted before it bails and falls back to an approximation.
|
||||
pub fn diff<T>(old: &[Token<T>], new: &[Token<T>]) -> Vec<RawOperation<T>>
|
||||
where
|
||||
T: PartialEq + Hash + Clone,
|
||||
T: PartialEq + Clone,
|
||||
{
|
||||
let max_d = max_d(old.len(), new.len());
|
||||
let mut vb = V::new(max_d);
|
||||
|
|
@ -131,7 +130,7 @@ fn find_middle_snake<T>(
|
|||
vb: &mut V,
|
||||
) -> Option<(usize, usize)>
|
||||
where
|
||||
T: PartialEq + Hash + Clone,
|
||||
T: PartialEq + Clone,
|
||||
{
|
||||
let n = old_range.len();
|
||||
let m = new_range.len();
|
||||
|
|
@ -238,7 +237,7 @@ fn conquer<T>(
|
|||
vb: &mut V,
|
||||
result: &mut Vec<RawOperation<T>>,
|
||||
) where
|
||||
T: PartialEq + Hash + Clone,
|
||||
T: PartialEq + Clone,
|
||||
{
|
||||
// Check for common prefix
|
||||
let common_prefix_len = common_prefix_len(old, old_range.clone(), new, new_range.clone());
|
||||
|
|
|
|||
|
|
@ -1,10 +1,9 @@
|
|||
use crate::tokenizer::token::Token;
|
||||
use std::hash::Hash;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum RawOperation<T>
|
||||
where
|
||||
T: PartialEq + Hash + Clone,
|
||||
T: PartialEq + Clone,
|
||||
{
|
||||
Insert(Vec<Token<T>>),
|
||||
Delete(Vec<Token<T>>),
|
||||
|
|
@ -13,7 +12,7 @@ where
|
|||
|
||||
impl<T> RawOperation<T>
|
||||
where
|
||||
T: PartialEq + Hash + Clone,
|
||||
T: PartialEq + Clone,
|
||||
{
|
||||
pub fn tokens(&self) -> &Vec<Token<T>> {
|
||||
match self {
|
||||
|
|
|
|||
|
|
@ -2,13 +2,12 @@ use super::Operation;
|
|||
use crate::diffs::raw_operation::RawOperation;
|
||||
use crate::errors::SyncLibError;
|
||||
use crate::operation_transformation::merge_context::MergeContext;
|
||||
use crate::tokenizer::token::Token;
|
||||
use crate::tokenizer::word_tokenizer::word_tokenizer;
|
||||
use crate::tokenizer::Tokenizer;
|
||||
use crate::utils::ordered_operation::OrderedOperation;
|
||||
use crate::utils::side::Side;
|
||||
use crate::{diffs::myers::diff, utils::merge_iters::MergeSorted};
|
||||
use ropey::Rope;
|
||||
use std::hash::Hash;
|
||||
use std::iter;
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
|
|
@ -22,13 +21,16 @@ use serde::{Deserialize, Serialize};
|
|||
/// EditedText derived from the same original text and then applied to the original text
|
||||
/// to get the reconciled text of concurrent edits.
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash, Default)]
|
||||
pub struct EditedText<'a> {
|
||||
#[derive(Debug, Clone, PartialEq, Default)]
|
||||
pub struct EditedText<'a, T>
|
||||
where
|
||||
T: PartialEq + Clone,
|
||||
{
|
||||
text: &'a str,
|
||||
operations: Vec<OrderedOperation>,
|
||||
operations: Vec<OrderedOperation<T>>,
|
||||
}
|
||||
|
||||
impl<'a> EditedText<'a> {
|
||||
impl<'a> EditedText<'a, String> {
|
||||
/// Create an EditedText from the given original (old) and updated (new) strings.
|
||||
/// The returned EditedText represents the changes from the original to the updated text.
|
||||
/// When the return value is applied to the original text, it will result in the updated text.
|
||||
|
|
@ -36,20 +38,21 @@ impl<'a> EditedText<'a> {
|
|||
pub fn from_strings(original: &'a str, updated: &str) -> Self {
|
||||
Self::from_strings_with_tokenizer(original, updated, &word_tokenizer)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T> EditedText<'a, T>
|
||||
where
|
||||
T: PartialEq + Clone,
|
||||
{
|
||||
/// Create an EditedText from the given original (old) and updated (new) strings.
|
||||
/// The returned EditedText represents the changes from the original to the updated text.
|
||||
/// When the return value is applied to the original text, it will result in the updated text.
|
||||
/// The tokenizer function is used to tokenize the text.
|
||||
pub fn from_strings_with_tokenizer<F, T>(
|
||||
pub fn from_strings_with_tokenizer(
|
||||
original: &'a str,
|
||||
updated: &str,
|
||||
tokenizer: &F,
|
||||
) -> Self
|
||||
where
|
||||
F: Fn(&str) -> Vec<Token<T>>,
|
||||
T: PartialEq + Hash + Clone,
|
||||
{
|
||||
tokenizer: &Tokenizer<T>,
|
||||
) -> Self {
|
||||
let original_tokens = (tokenizer)(original);
|
||||
let updated_tokens = (tokenizer)(updated);
|
||||
|
||||
|
|
@ -63,10 +66,9 @@ impl<'a> EditedText<'a> {
|
|||
}
|
||||
|
||||
// Turn raw operations into ordered operations while keeping track of old & new indexes.
|
||||
fn cook_operations<I, T>(raw_operations: I) -> impl Iterator<Item = OrderedOperation>
|
||||
fn cook_operations<I>(raw_operations: I) -> impl Iterator<Item = OrderedOperation<T>>
|
||||
where
|
||||
I: IntoIterator<Item = RawOperation<T>>,
|
||||
T: PartialEq + Hash + Clone,
|
||||
{
|
||||
let mut new_index = 0; // this is the start index of the operation on the new text
|
||||
let mut order = 0; // this is the start index of the operation on the original text
|
||||
|
|
@ -81,8 +83,8 @@ impl<'a> EditedText<'a> {
|
|||
|
||||
None
|
||||
}
|
||||
RawOperation::Insert(..) => {
|
||||
let op = Operation::create_insert(new_index, raw_operation.get_original_text())
|
||||
RawOperation::Insert(tokens) => {
|
||||
let op = Operation::create_insert(new_index, tokens)
|
||||
.map(|operation| OrderedOperation { order, operation });
|
||||
|
||||
new_index += length;
|
||||
|
|
@ -110,10 +112,9 @@ impl<'a> EditedText<'a> {
|
|||
})
|
||||
}
|
||||
|
||||
fn elongate_operations<I, T>(raw_operations: I) -> Vec<RawOperation<T>>
|
||||
fn elongate_operations<I>(raw_operations: I) -> Vec<RawOperation<T>>
|
||||
where
|
||||
I: IntoIterator<Item = RawOperation<T>>,
|
||||
T: PartialEq + Hash + Clone,
|
||||
{
|
||||
let mut maybe_previous_insert: Option<RawOperation<T>> = None;
|
||||
let mut maybe_previous_delete: Option<RawOperation<T>> = None;
|
||||
|
|
@ -164,7 +165,7 @@ impl<'a> EditedText<'a> {
|
|||
/// Create a new EditedText with the given operations.
|
||||
/// The operations must be in the order in which they are meant to be applied.
|
||||
/// The operations must not overlap.
|
||||
fn new(text: &'a str, operations: Vec<OrderedOperation>) -> Self {
|
||||
fn new(text: &'a str, operations: Vec<OrderedOperation<T>>) -> Self {
|
||||
operations
|
||||
.iter()
|
||||
.zip(operations.iter().skip(1))
|
||||
|
|
@ -271,7 +272,7 @@ mod tests {
|
|||
#[test]
|
||||
fn test_calculate_operations_with_insert() {
|
||||
let original = "hello world! ...";
|
||||
let left = "hello world! I'm Andras.";
|
||||
let left = "Hello world! I'm Andras.";
|
||||
let right = "Hello world! How are you?";
|
||||
let expected = "Hello world! I'm Andras.How are you?";
|
||||
|
||||
|
|
|
|||
|
|
@ -1,15 +1,33 @@
|
|||
use crate::operation_transformation::Operation;
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct MergeContext {
|
||||
pub last_delete: Option<Operation>,
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct MergeContext<T>
|
||||
where
|
||||
T: PartialEq + Clone,
|
||||
{
|
||||
pub last_delete: Option<Operation<T>>,
|
||||
pub shift: i64,
|
||||
}
|
||||
|
||||
impl MergeContext {
|
||||
impl<T> Default for MergeContext<T>
|
||||
where
|
||||
T: PartialEq + Clone,
|
||||
{
|
||||
fn default() -> Self {
|
||||
MergeContext {
|
||||
last_delete: None,
|
||||
shift: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> MergeContext<T>
|
||||
where
|
||||
T: PartialEq + Clone,
|
||||
{
|
||||
/// Replace the last delete operation (if there was one) with a new one while
|
||||
/// applying it to the shift.
|
||||
pub fn replace_delete(&mut self, delete: Option<Operation>) {
|
||||
pub fn replace_delete(&mut self, delete: Option<Operation<T>>) {
|
||||
if let Some(produced_last_delete) = self.last_delete.take() {
|
||||
self.shift -= produced_last_delete.len() as i64;
|
||||
}
|
||||
|
|
@ -19,7 +37,7 @@ impl MergeContext {
|
|||
|
||||
/// Remove the last delete operation (if there was one) in case it is behind the
|
||||
/// threshold operation.
|
||||
pub fn consume_delete_if_behind_operation(&mut self, threshold_operation: &Operation) {
|
||||
pub fn consume_delete_if_behind_operation(&mut self, threshold_operation: &Operation<T>) {
|
||||
match self.last_delete.as_ref() {
|
||||
Some(last_delete)
|
||||
if threshold_operation.start_index() as i64 + self.shift
|
||||
|
|
|
|||
|
|
@ -4,9 +4,8 @@ mod operation;
|
|||
|
||||
pub use edited_text::EditedText;
|
||||
pub use operation::Operation;
|
||||
use std::hash::Hash;
|
||||
|
||||
use crate::{errors::SyncLibError, tokenizer::token::Token};
|
||||
use crate::{errors::SyncLibError, tokenizer::Tokenizer};
|
||||
|
||||
pub fn reconcile(original: &str, left: &str, right: &str) -> Result<String, SyncLibError> {
|
||||
let left_operations = EditedText::from_strings(original, left);
|
||||
|
|
@ -20,11 +19,10 @@ pub fn reconcile_with_tokenizer<F, T>(
|
|||
original: &str,
|
||||
left: &str,
|
||||
right: &str,
|
||||
tokenizer: &F,
|
||||
tokenizer: &Tokenizer<T>,
|
||||
) -> Result<String, SyncLibError>
|
||||
where
|
||||
F: Fn(&str) -> Vec<Token<T>>,
|
||||
T: PartialEq + Hash + Clone,
|
||||
T: PartialEq + Clone,
|
||||
{
|
||||
let left_operations = EditedText::from_strings_with_tokenizer(original, left, tokenizer);
|
||||
let right_operations = EditedText::from_strings_with_tokenizer(original, right, tokenizer);
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
use ropey::Rope;
|
||||
use std::fmt::Display;
|
||||
|
||||
use crate::errors::SyncLibError;
|
||||
use crate::{errors::SyncLibError, Token};
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
|
@ -12,11 +12,14 @@ use super::merge_context::MergeContext;
|
|||
/// Operation is tied to a ropey::Rope and is mainly expected to be
|
||||
/// created by EditedText.
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub enum Operation {
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum Operation<T>
|
||||
where
|
||||
T: PartialEq + Clone,
|
||||
{
|
||||
Insert {
|
||||
index: usize,
|
||||
text: String,
|
||||
text: Vec<Token<T>>,
|
||||
},
|
||||
|
||||
Delete {
|
||||
|
|
@ -28,10 +31,13 @@ pub enum Operation {
|
|||
},
|
||||
}
|
||||
|
||||
impl Operation {
|
||||
impl<T> Operation<T>
|
||||
where
|
||||
T: PartialEq + Clone,
|
||||
{
|
||||
/// Creates an insert operation with the given index and text.
|
||||
/// If the text is empty (meaning that the operation would be a no-op), returns None.
|
||||
pub fn create_insert(index: usize, text: String) -> Option<Self> {
|
||||
pub fn create_insert(index: usize, text: Vec<Token<T>>) -> Option<Self> {
|
||||
if text.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
|
@ -82,7 +88,13 @@ impl Operation {
|
|||
pub fn apply<'a>(&self, rope_text: &'a mut Rope) -> Result<&'a mut Rope, SyncLibError> {
|
||||
match self {
|
||||
Operation::Insert { text, .. } => rope_text
|
||||
.try_insert(self.start_index(), text)
|
||||
.try_insert(
|
||||
self.start_index(),
|
||||
&text
|
||||
.iter()
|
||||
.map(|token| token.original())
|
||||
.collect::<String>(),
|
||||
)
|
||||
.map_err(|err| {
|
||||
SyncLibError::OperationApplicationError(format!(
|
||||
"Failed to insert text: {}",
|
||||
|
|
@ -141,7 +153,9 @@ impl Operation {
|
|||
/// Returns the number of affected characters. It is always greater than 0 because empty operations cannot be created.
|
||||
pub fn len(&self) -> usize {
|
||||
match self {
|
||||
Operation::Insert { text, .. } => text.chars().count(),
|
||||
Operation::Insert { text, .. } => {
|
||||
text.iter().map(|token| token.get_original_length()).sum()
|
||||
}
|
||||
Operation::Delete {
|
||||
deleted_character_count,
|
||||
..
|
||||
|
|
@ -187,9 +201,9 @@ impl Operation {
|
|||
/// The contexts are updated in-place.
|
||||
pub fn merge_operations_with_context(
|
||||
self,
|
||||
affecting_context: &mut MergeContext,
|
||||
produced_context: &mut MergeContext,
|
||||
) -> Option<Operation> {
|
||||
affecting_context: &mut MergeContext<T>,
|
||||
produced_context: &mut MergeContext<T>,
|
||||
) -> Option<Operation<T>> {
|
||||
affecting_context.consume_delete_if_behind_operation(&self);
|
||||
|
||||
let operation = self.with_shifted_index(affecting_context.shift);
|
||||
|
|
@ -253,11 +267,21 @@ impl Operation {
|
|||
}
|
||||
}
|
||||
|
||||
impl Display for Operation {
|
||||
impl<T> Display for Operation<T>
|
||||
where
|
||||
T: PartialEq + Clone,
|
||||
{
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Operation::Insert { index, text } => {
|
||||
write!(f, "<insert '{}' from index {}>", text, index)
|
||||
write!(
|
||||
f,
|
||||
"<insert '{}' from index {}>",
|
||||
text.iter()
|
||||
.map(|token| token.original())
|
||||
.collect::<String>(),
|
||||
index
|
||||
)
|
||||
}
|
||||
Operation::Delete {
|
||||
index,
|
||||
|
|
@ -293,7 +317,7 @@ mod tests {
|
|||
#[test]
|
||||
#[should_panic]
|
||||
fn test_shifting_error() {
|
||||
insta::assert_debug_snapshot!(Operation::create_insert(1, "hi".to_string())
|
||||
insta::assert_debug_snapshot!(Operation::create_insert(1, vec!["hi".into()])
|
||||
.unwrap()
|
||||
.with_shifted_index(-2));
|
||||
}
|
||||
|
|
@ -301,7 +325,7 @@ mod tests {
|
|||
#[test]
|
||||
fn test_apply_delete_with_create() -> Result<(), SyncLibError> {
|
||||
let mut rope = Rope::from_str("hello world");
|
||||
let operation = Operation::create_delete_with_text(5, " world".to_string()).unwrap();
|
||||
let operation = Operation::<()>::create_delete_with_text(5, " world".to_string()).unwrap();
|
||||
|
||||
operation.apply(&mut rope)?;
|
||||
|
||||
|
|
@ -313,7 +337,7 @@ mod tests {
|
|||
#[test]
|
||||
fn test_apply_insert() -> Result<(), SyncLibError> {
|
||||
let mut rope = Rope::from_str("hello");
|
||||
let operation = Operation::create_insert(5, " my friend".to_string()).unwrap();
|
||||
let operation = Operation::create_insert(5, vec![" my friend".into()]).unwrap();
|
||||
|
||||
operation.apply(&mut rope)?;
|
||||
|
||||
|
|
|
|||
|
|
@ -10,7 +10,20 @@ EditedText {
|
|||
order: 0,
|
||||
operation: Insert {
|
||||
index: 0,
|
||||
text: "Hello, my friend! ",
|
||||
text: [
|
||||
Token {
|
||||
normalised: "Hello, ",
|
||||
original: "Hello, ",
|
||||
},
|
||||
Token {
|
||||
normalised: "my ",
|
||||
original: "my ",
|
||||
},
|
||||
Token {
|
||||
normalised: "friend! ",
|
||||
original: "friend! ",
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
OrderedOperation {
|
||||
|
|
@ -27,7 +40,20 @@ EditedText {
|
|||
order: 21,
|
||||
operation: Insert {
|
||||
index: 26,
|
||||
text: "you doing? Albert",
|
||||
text: [
|
||||
Token {
|
||||
normalised: "you ",
|
||||
original: "you ",
|
||||
},
|
||||
Token {
|
||||
normalised: "doing? ",
|
||||
original: "doing? ",
|
||||
},
|
||||
Token {
|
||||
normalised: "Albert",
|
||||
original: "Albert",
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
OrderedOperation {
|
||||
|
|
|
|||
|
|
@ -1,2 +1,6 @@
|
|||
use token::Token;
|
||||
|
||||
pub mod token;
|
||||
pub mod word_tokenizer;
|
||||
|
||||
pub type Tokenizer<T> = dyn Fn(&str) -> Vec<Token<T>>;
|
||||
|
|
|
|||
|
|
@ -1,17 +1,24 @@
|
|||
use std::hash::Hash;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Token<T>
|
||||
where
|
||||
T: PartialEq + Hash + Clone,
|
||||
T: PartialEq + Clone,
|
||||
{
|
||||
normalised: T,
|
||||
original: String,
|
||||
}
|
||||
|
||||
impl From<&str> for Token<String> {
|
||||
fn from(s: &str) -> Self {
|
||||
Token {
|
||||
normalised: s.to_string(),
|
||||
original: s.to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Token<T>
|
||||
where
|
||||
T: PartialEq + Hash + Clone,
|
||||
T: PartialEq + Clone,
|
||||
{
|
||||
pub fn new(normalised: T, original: String) -> Self {
|
||||
Token {
|
||||
|
|
@ -35,18 +42,9 @@ where
|
|||
|
||||
impl<T> PartialEq for Token<T>
|
||||
where
|
||||
T: PartialEq + Hash + Clone,
|
||||
T: PartialEq + Clone,
|
||||
{
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.normalised == other.normalised
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Hash for Token<T>
|
||||
where
|
||||
T: PartialEq + Hash + Clone,
|
||||
{
|
||||
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
|
||||
self.normalised.hash(state);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,8 +4,11 @@ use serde::{Deserialize, Serialize};
|
|||
use crate::operation_transformation::Operation;
|
||||
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct OrderedOperation {
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct OrderedOperation<T>
|
||||
where
|
||||
T: PartialEq + Clone,
|
||||
{
|
||||
pub order: usize,
|
||||
pub operation: Operation,
|
||||
pub operation: Operation<T>,
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue