Add left/right joinability for tokens
This commit is contained in:
parent
b0c6c082a1
commit
b230d34b88
13 changed files with 313 additions and 75 deletions
|
|
@ -28,10 +28,26 @@ where
|
|||
|
||||
pub fn get_original_text(self) -> String { self.tokens().iter().map(Token::original).collect() }
|
||||
|
||||
/// Extends the operation with another operation if returning the new
|
||||
/// operation. Only operations of the same type can be used to extend.
|
||||
/// If the operations are of different types, returns None.
|
||||
pub fn is_left_joinable(&self) -> bool {
|
||||
let first_token = self.tokens().first();
|
||||
first_token.map_or(true, |t| t.get_is_left_joinable())
|
||||
}
|
||||
|
||||
pub fn is_right_joinable(&self) -> bool {
|
||||
let last_token = self.tokens().last();
|
||||
last_token.map_or(true, |t| t.get_is_right_joinable())
|
||||
}
|
||||
|
||||
/// Extends the operation with another operation when it returns Some
|
||||
/// operation. Only operations of the same type as self can be used to
|
||||
/// extend self. If the operations are of different types, returns None.
|
||||
pub fn extend(self, other: RawOperation<T>) -> Option<RawOperation<T>> {
|
||||
debug_assert!(
|
||||
std::mem::discriminant(&self) == std::mem::discriminant(&other),
|
||||
"Cannot extend operations of different types. This should have been handled before \
|
||||
calling this function."
|
||||
);
|
||||
|
||||
match (self, other) {
|
||||
(RawOperation::Insert(tokens1), RawOperation::Insert(tokens2)) => Some(
|
||||
RawOperation::Insert(tokens1.into_iter().chain(tokens2).collect()),
|
||||
|
|
@ -42,7 +58,7 @@ where
|
|||
(RawOperation::Equal(tokens1), RawOperation::Equal(tokens2)) => Some(
|
||||
RawOperation::Equal(tokens1.into_iter().chain(tokens2).collect()),
|
||||
),
|
||||
_ => None,
|
||||
_ => unreachable!("Only operations of the same type can be extended"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,6 +9,8 @@ snapshot_kind: text
|
|||
Token {
|
||||
normalised: "a",
|
||||
original: "a",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
|
|
@ -17,6 +19,8 @@ snapshot_kind: text
|
|||
Token {
|
||||
normalised: "x",
|
||||
original: "x",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
|
|
@ -25,6 +29,8 @@ snapshot_kind: text
|
|||
Token {
|
||||
normalised: "b",
|
||||
original: "b",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
|
|
@ -33,6 +39,8 @@ snapshot_kind: text
|
|||
Token {
|
||||
normalised: "c",
|
||||
original: "c",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
|
|
@ -41,6 +49,8 @@ snapshot_kind: text
|
|||
Token {
|
||||
normalised: "y",
|
||||
original: "y",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
|
|
@ -49,6 +59,8 @@ snapshot_kind: text
|
|||
Token {
|
||||
normalised: "d",
|
||||
original: "d",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
|
|
|
|||
|
|
@ -9,10 +9,14 @@ snapshot_kind: text
|
|||
Token {
|
||||
normalised: "a",
|
||||
original: "a",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: "b",
|
||||
original: "b",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
|
|
|
|||
|
|
@ -9,14 +9,20 @@ snapshot_kind: text
|
|||
Token {
|
||||
normalised: "a",
|
||||
original: "a",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: "b",
|
||||
original: "b",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: "c",
|
||||
original: "c",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
|
|
|
|||
|
|
@ -9,10 +9,14 @@ snapshot_kind: text
|
|||
Token {
|
||||
normalised: "a",
|
||||
original: "a",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: "b",
|
||||
original: "b",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
|
|
|
|||
|
|
@ -9,6 +9,8 @@ snapshot_kind: text
|
|||
Token {
|
||||
normalised: "a",
|
||||
original: "a",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
|
|
@ -17,10 +19,14 @@ snapshot_kind: text
|
|||
Token {
|
||||
normalised: "b",
|
||||
original: "b",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: "c",
|
||||
original: "c",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
|
|
@ -29,6 +35,8 @@ snapshot_kind: text
|
|||
Token {
|
||||
normalised: "x",
|
||||
original: "x",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
|
|
@ -37,6 +45,8 @@ snapshot_kind: text
|
|||
Token {
|
||||
normalised: "d",
|
||||
original: "d",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
|
|
|
|||
|
|
@ -3,15 +3,12 @@ use core::iter;
|
|||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::{CursorPosition, Operation, TextWithCursors};
|
||||
use super::{CursorPosition, Operation, TextWithCursors, ordered_operation::OrderedOperation};
|
||||
use crate::{
|
||||
diffs::{myers::diff, raw_operation::RawOperation},
|
||||
operation_transformation::merge_context::MergeContext,
|
||||
tokenizer::{Tokenizer, word_tokenizer::word_tokenizer},
|
||||
utils::{
|
||||
merge_iters::MergeSorted as _, ordered_operation::OrderedOperation, side::Side,
|
||||
string_builder::StringBuilder,
|
||||
},
|
||||
utils::{merge_iters::MergeSorted as _, side::Side, string_builder::StringBuilder},
|
||||
};
|
||||
|
||||
/// A sequence of operations that can be applied to a text document.
|
||||
|
|
@ -66,11 +63,93 @@ where
|
|||
|
||||
Self::new(
|
||||
original,
|
||||
Self::cook_operations(Self::elongate_operations(diff)).collect(),
|
||||
Self::cook_operations(Self::elongate_operations(Self::break_up_raw_operations(
|
||||
diff,
|
||||
)))
|
||||
.collect(),
|
||||
updated.cursors,
|
||||
)
|
||||
}
|
||||
|
||||
fn break_up_raw_operations<I>(raw_operations: I) -> impl Iterator<Item = RawOperation<T>>
|
||||
where
|
||||
I: IntoIterator<Item = RawOperation<T>>,
|
||||
{
|
||||
raw_operations.into_iter().flat_map(|raw_operation| {
|
||||
let mut result: Vec<RawOperation<T>> = Vec::new();
|
||||
match raw_operation {
|
||||
RawOperation::Insert(tokens) => {
|
||||
for token in tokens {
|
||||
result.push(RawOperation::Insert(vec![token]));
|
||||
}
|
||||
}
|
||||
RawOperation::Delete(tokens) => {
|
||||
for token in tokens {
|
||||
result.push(RawOperation::Delete(vec![token]));
|
||||
}
|
||||
}
|
||||
RawOperation::Equal(tokens) => {
|
||||
for token in tokens {
|
||||
result.push(RawOperation::Equal(vec![token]));
|
||||
}
|
||||
}
|
||||
}
|
||||
result.into_iter()
|
||||
})
|
||||
}
|
||||
|
||||
fn elongate_operations<I>(raw_operations: I) -> Vec<RawOperation<T>>
|
||||
where
|
||||
I: IntoIterator<Item = RawOperation<T>>,
|
||||
{
|
||||
let mut maybe_previous_insert: Option<RawOperation<T>> = None;
|
||||
let mut maybe_previous_delete: Option<RawOperation<T>> = None;
|
||||
|
||||
let mut result: Vec<RawOperation<T>> = raw_operations
|
||||
.into_iter()
|
||||
.flat_map(|next| match next {
|
||||
RawOperation::Insert(..) => match maybe_previous_insert.take() {
|
||||
Some(prev) if prev.is_right_joinable() && next.is_left_joinable() => {
|
||||
maybe_previous_insert = prev.extend(next);
|
||||
Box::new(iter::empty()) as Box<dyn Iterator<Item = RawOperation<T>>>
|
||||
}
|
||||
prev => {
|
||||
maybe_previous_insert = Some(next);
|
||||
Box::new(prev.into_iter())
|
||||
}
|
||||
},
|
||||
RawOperation::Delete(..) => match maybe_previous_delete.take() {
|
||||
Some(prev) if prev.is_right_joinable() && next.is_left_joinable() => {
|
||||
maybe_previous_delete = prev.extend(next);
|
||||
Box::new(iter::empty()) as Box<dyn Iterator<Item = RawOperation<T>>>
|
||||
}
|
||||
prev => {
|
||||
maybe_previous_delete = Some(next);
|
||||
Box::new(prev.into_iter())
|
||||
}
|
||||
},
|
||||
RawOperation::Equal(..) => Box::new(
|
||||
maybe_previous_insert
|
||||
.take()
|
||||
.into_iter()
|
||||
.chain(maybe_previous_delete.take())
|
||||
.chain(iter::once(next)),
|
||||
)
|
||||
as Box<dyn Iterator<Item = RawOperation<T>>>,
|
||||
})
|
||||
.collect();
|
||||
|
||||
if let Some(prev) = maybe_previous_insert {
|
||||
result.push(prev);
|
||||
}
|
||||
|
||||
if let Some(prev) = maybe_previous_delete {
|
||||
result.push(prev);
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
// Turn raw operations into ordered operations while keeping track of old & new
|
||||
// indexes.
|
||||
fn cook_operations<I>(raw_operations: I) -> impl Iterator<Item = OrderedOperation<T>>
|
||||
|
|
@ -119,56 +198,6 @@ where
|
|||
})
|
||||
}
|
||||
|
||||
fn elongate_operations<I>(raw_operations: I) -> Vec<RawOperation<T>>
|
||||
where
|
||||
I: IntoIterator<Item = RawOperation<T>>,
|
||||
{
|
||||
let mut maybe_previous_insert: Option<RawOperation<T>> = None;
|
||||
let mut maybe_previous_delete: Option<RawOperation<T>> = None;
|
||||
|
||||
let mut result: Vec<RawOperation<T>> = raw_operations
|
||||
.into_iter()
|
||||
.flat_map(|next| match next {
|
||||
RawOperation::Insert(..) => {
|
||||
if let Some(prev) = maybe_previous_insert.take() {
|
||||
maybe_previous_insert = prev.extend(next);
|
||||
} else {
|
||||
maybe_previous_insert = Some(next);
|
||||
}
|
||||
|
||||
Box::new(iter::empty()) as Box<dyn Iterator<Item = RawOperation<T>>>
|
||||
}
|
||||
RawOperation::Delete(..) => {
|
||||
if let Some(prev) = maybe_previous_delete.take() {
|
||||
maybe_previous_delete = prev.extend(next);
|
||||
} else {
|
||||
maybe_previous_delete = Some(next);
|
||||
}
|
||||
|
||||
Box::new(iter::empty()) as Box<dyn Iterator<Item = RawOperation<T>>>
|
||||
}
|
||||
RawOperation::Equal(..) => Box::new(
|
||||
maybe_previous_insert
|
||||
.take()
|
||||
.into_iter()
|
||||
.chain(maybe_previous_delete.take())
|
||||
.chain(iter::once(next)),
|
||||
)
|
||||
as Box<dyn Iterator<Item = RawOperation<T>>>,
|
||||
})
|
||||
.collect();
|
||||
|
||||
if let Some(prev) = maybe_previous_insert {
|
||||
result.push(prev);
|
||||
}
|
||||
|
||||
if let Some(prev) = maybe_previous_delete {
|
||||
result.push(prev);
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Create a new `EditedText` with the given operations.
|
||||
/// The operations must be in the order in which they are meant to be
|
||||
/// applied. The operations must not overlap.
|
||||
|
|
@ -225,6 +254,7 @@ where
|
|||
// Operations on the left and right must come in the same order so that
|
||||
// inserts can be merged with other inserts and deletes with deletes.
|
||||
usize::from(matches!(operation.operation, Operation::Delete { .. })),
|
||||
operation.operation.start_index(),
|
||||
// Make sure that the ordering is deterministic regardless which text
|
||||
// is left or right.
|
||||
match &operation.operation {
|
||||
|
|
|
|||
|
|
@ -5,11 +5,21 @@ snapshot_kind: text
|
|||
---
|
||||
[
|
||||
Token {
|
||||
normalised: "what?",
|
||||
original: " what?",
|
||||
normalised: " what?",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: "",
|
||||
normalised: "what?",
|
||||
original: "what?",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: " ",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
]
|
||||
|
|
|
|||
|
|
@ -4,20 +4,52 @@ expression: "word_tokenizer(\" hello, \\nwhere are you?\")"
|
|||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
Token {
|
||||
normalised: " hello,",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: "hello,",
|
||||
original: " hello,",
|
||||
original: "hello,",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: " \nwhere",
|
||||
original: " \n",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: "where",
|
||||
original: " \nwhere",
|
||||
original: "where",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: " are",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: "are",
|
||||
original: " are",
|
||||
original: "are",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: " you?",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: "you?",
|
||||
original: " you?",
|
||||
original: "you?",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
]
|
||||
|
|
|
|||
|
|
@ -0,0 +1,39 @@
|
|||
---
|
||||
source: reconcile/src/tokenizer/word_tokenizer.rs
|
||||
expression: "word_tokenizer(\" hello, \\nwhere are you?\")"
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
Token {
|
||||
normalised: " ",
|
||||
original: " ",
|
||||
},
|
||||
Token {
|
||||
normalised: "hello,",
|
||||
original: "hello,",
|
||||
},
|
||||
Token {
|
||||
normalised: " \n",
|
||||
original: " \n",
|
||||
},
|
||||
Token {
|
||||
normalised: "where",
|
||||
original: "where",
|
||||
},
|
||||
Token {
|
||||
normalised: " ",
|
||||
original: " ",
|
||||
},
|
||||
Token {
|
||||
normalised: "are",
|
||||
original: "are",
|
||||
},
|
||||
Token {
|
||||
normalised: " ",
|
||||
original: " ",
|
||||
},
|
||||
Token {
|
||||
normalised: "you?",
|
||||
original: "you?",
|
||||
},
|
||||
]
|
||||
|
|
@ -7,9 +7,19 @@ snapshot_kind: text
|
|||
Token {
|
||||
normalised: "Hi",
|
||||
original: "Hi",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: " there!",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: "there!",
|
||||
original: " there!",
|
||||
original: "there!",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
]
|
||||
|
|
|
|||
|
|
@ -3,29 +3,45 @@ use serde::{Deserialize, Serialize};
|
|||
|
||||
/// A token is a string that has been normalised in some way.
|
||||
/// The normalised form is used for comparison, while the original form is used
|
||||
/// for applying Operations.
|
||||
/// for applying `Operation`-s.
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Token<T>
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
normalised: T,
|
||||
/// The normalised form of the token used deriving the diff.
|
||||
pub normalised: T,
|
||||
|
||||
/// The original string, that should be inserted or deleted in the document.
|
||||
original: String,
|
||||
|
||||
/// Whether the token is joinable with the previous token.
|
||||
is_left_joinable: bool,
|
||||
|
||||
/// Whether the token is joinable with the next token.
|
||||
is_right_joinable: bool,
|
||||
}
|
||||
|
||||
impl From<&str> for Token<String> {
|
||||
fn from(s: &str) -> Self { Token::new(s.trim().to_owned(), s.to_owned()) }
|
||||
fn from(text: &str) -> Self { Token::new(text.to_owned(), text.to_owned(), true, true) }
|
||||
}
|
||||
|
||||
impl<T> Token<T>
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
pub fn new(normalised: T, original: String) -> Self {
|
||||
pub fn new(
|
||||
normalised: T,
|
||||
original: String,
|
||||
is_left_joinable: bool,
|
||||
is_right_joinable: bool,
|
||||
) -> Self {
|
||||
Token {
|
||||
normalised,
|
||||
original,
|
||||
is_left_joinable,
|
||||
is_right_joinable,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -34,6 +50,10 @@ where
|
|||
pub fn normalised(&self) -> &T { &self.normalised }
|
||||
|
||||
pub fn get_original_length(&self) -> usize { self.original.chars().count() }
|
||||
|
||||
pub fn get_is_left_joinable(&self) -> bool { self.is_left_joinable }
|
||||
|
||||
pub fn get_is_right_joinable(&self) -> bool { self.is_right_joinable }
|
||||
}
|
||||
|
||||
impl<T> PartialEq for Token<T>
|
||||
|
|
|
|||
|
|
@ -7,14 +7,59 @@ left: |
|
|||
right: |
|
||||
Hello there!
|
||||
|
||||
|
||||
Best,
|
||||
Andras
|
||||
|
||||
expected: |
|
||||
Hello there!
|
||||
|
||||
How are you?
|
||||
|
||||
Best,
|
||||
Andras
|
||||
|
||||
|
||||
How are you?
|
||||
|
||||
---
|
||||
|
||||
parent: |
|
||||
- my list
|
||||
- 2nd item
|
||||
- 3rd item
|
||||
|
||||
left: |
|
||||
- my list
|
||||
- 2nd item
|
||||
- nested list
|
||||
- very nested list
|
||||
- 3rd item
|
||||
|
||||
right: |
|
||||
- my list
|
||||
- nested list
|
||||
- 2nd item
|
||||
- 3rd item
|
||||
- another nested list
|
||||
|
||||
expected: |
|
||||
- my list
|
||||
- nested list
|
||||
- 2nd item
|
||||
- nested list
|
||||
- very nested list
|
||||
- 3rd item
|
||||
- another nested list
|
||||
|
||||
---
|
||||
|
||||
parent: |
|
||||
a
|
||||
a
|
||||
left: |
|
||||
a
|
||||
a
|
||||
right: |
|
||||
a
|
||||
a
|
||||
expected: |
|
||||
a
|
||||
a
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue