Add left/right joinability for tokens
This commit is contained in:
parent
b0c6c082a1
commit
b230d34b88
13 changed files with 313 additions and 75 deletions
|
|
@ -5,11 +5,21 @@ snapshot_kind: text
|
|||
---
|
||||
[
|
||||
Token {
|
||||
normalised: "what?",
|
||||
original: " what?",
|
||||
normalised: " what?",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: "",
|
||||
normalised: "what?",
|
||||
original: "what?",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: " ",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
]
|
||||
|
|
|
|||
|
|
@ -4,20 +4,52 @@ expression: "word_tokenizer(\" hello, \\nwhere are you?\")"
|
|||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
Token {
|
||||
normalised: " hello,",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: "hello,",
|
||||
original: " hello,",
|
||||
original: "hello,",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: " \nwhere",
|
||||
original: " \n",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: "where",
|
||||
original: " \nwhere",
|
||||
original: "where",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: " are",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: "are",
|
||||
original: " are",
|
||||
original: "are",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: " you?",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: "you?",
|
||||
original: " you?",
|
||||
original: "you?",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
]
|
||||
|
|
|
|||
|
|
@ -0,0 +1,39 @@
|
|||
---
|
||||
source: reconcile/src/tokenizer/word_tokenizer.rs
|
||||
expression: "word_tokenizer(\" hello, \\nwhere are you?\")"
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
Token {
|
||||
normalised: " ",
|
||||
original: " ",
|
||||
},
|
||||
Token {
|
||||
normalised: "hello,",
|
||||
original: "hello,",
|
||||
},
|
||||
Token {
|
||||
normalised: " \n",
|
||||
original: " \n",
|
||||
},
|
||||
Token {
|
||||
normalised: "where",
|
||||
original: "where",
|
||||
},
|
||||
Token {
|
||||
normalised: " ",
|
||||
original: " ",
|
||||
},
|
||||
Token {
|
||||
normalised: "are",
|
||||
original: "are",
|
||||
},
|
||||
Token {
|
||||
normalised: " ",
|
||||
original: " ",
|
||||
},
|
||||
Token {
|
||||
normalised: "you?",
|
||||
original: "you?",
|
||||
},
|
||||
]
|
||||
|
|
@ -7,9 +7,19 @@ snapshot_kind: text
|
|||
Token {
|
||||
normalised: "Hi",
|
||||
original: "Hi",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: " there!",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: "there!",
|
||||
original: " there!",
|
||||
original: "there!",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
]
|
||||
|
|
|
|||
|
|
@ -3,29 +3,45 @@ use serde::{Deserialize, Serialize};
|
|||
|
||||
/// A token is a string that has been normalised in some way.
|
||||
/// The normalised form is used for comparison, while the original form is used
|
||||
/// for applying Operations.
|
||||
/// for applying `Operation`-s.
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Token<T>
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
normalised: T,
|
||||
/// The normalised form of the token used deriving the diff.
|
||||
pub normalised: T,
|
||||
|
||||
/// The original string, that should be inserted or deleted in the document.
|
||||
original: String,
|
||||
|
||||
/// Whether the token is joinable with the previous token.
|
||||
is_left_joinable: bool,
|
||||
|
||||
/// Whether the token is joinable with the next token.
|
||||
is_right_joinable: bool,
|
||||
}
|
||||
|
||||
impl From<&str> for Token<String> {
|
||||
fn from(s: &str) -> Self { Token::new(s.trim().to_owned(), s.to_owned()) }
|
||||
fn from(text: &str) -> Self { Token::new(text.to_owned(), text.to_owned(), true, true) }
|
||||
}
|
||||
|
||||
impl<T> Token<T>
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
pub fn new(normalised: T, original: String) -> Self {
|
||||
pub fn new(
|
||||
normalised: T,
|
||||
original: String,
|
||||
is_left_joinable: bool,
|
||||
is_right_joinable: bool,
|
||||
) -> Self {
|
||||
Token {
|
||||
normalised,
|
||||
original,
|
||||
is_left_joinable,
|
||||
is_right_joinable,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -34,6 +50,10 @@ where
|
|||
pub fn normalised(&self) -> &T { &self.normalised }
|
||||
|
||||
pub fn get_original_length(&self) -> usize { self.original.chars().count() }
|
||||
|
||||
pub fn get_is_left_joinable(&self) -> bool { self.is_left_joinable }
|
||||
|
||||
pub fn get_is_right_joinable(&self) -> bool { self.is_right_joinable }
|
||||
}
|
||||
|
||||
impl<T> PartialEq for Token<T>
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue