Add tokenizer

This commit is contained in:
Andras Schmelczer 2024-11-24 22:32:06 +00:00
parent e910d9c5f4
commit 331e264399
No known key found for this signature in database
GPG key ID: FC8F2C3D3D1A718C
7 changed files with 119 additions and 33 deletions

View file

@ -1,14 +1,21 @@
use crate::tokenizer::token::Token;
use std::hash::Hash;
#[derive(Debug, Clone, PartialEq)]
pub enum RawOperation {
Insert(Vec<Token>),
Delete(Vec<Token>),
Equal(Vec<Token>),
pub enum RawOperation<T>
where
T: PartialEq + Hash + Clone,
{
Insert(Vec<Token<T>>),
Delete(Vec<Token<T>>),
Equal(Vec<Token<T>>),
}
impl RawOperation {
pub fn tokens(&self) -> &Vec<Token> {
impl<T> RawOperation<T>
where
T: PartialEq + Hash + Clone,
{
pub fn tokens(&self) -> &Vec<Token<T>> {
match self {
RawOperation::Insert(tokens) => tokens,
RawOperation::Delete(tokens) => tokens,
@ -17,13 +24,28 @@ impl RawOperation {
}
pub fn original_text_length(&self) -> usize {
self.tokens()
.iter()
.map(|t| t.original.chars().count())
.sum()
self.tokens().iter().map(|t| t.get_original_length()).sum()
}
pub fn get_original_text(self) -> String {
self.tokens().iter().map(|t| t.original.clone()).collect()
self.tokens().iter().map(|t| t.original()).collect()
}
/// Extends the operation with another operation if returning the new operation.
/// Only operations of the same type can be used to extend. If the operations are of different
/// types, returns None.
pub fn extend(self, other: RawOperation<T>) -> Option<RawOperation<T>> {
match (self, other) {
(RawOperation::Insert(tokens1), RawOperation::Insert(tokens2)) => Some(
RawOperation::Insert(tokens1.into_iter().chain(tokens2.into_iter()).collect()),
),
(RawOperation::Delete(tokens1), RawOperation::Delete(tokens2)) => Some(
RawOperation::Delete(tokens1.into_iter().chain(tokens2.into_iter()).collect()),
),
(RawOperation::Equal(tokens1), RawOperation::Equal(tokens2)) => Some(
RawOperation::Equal(tokens1.into_iter().chain(tokens2.into_iter()).collect()),
),
_ => None,
}
}
}