reconcile/src/tokenizer.rs

53 lines
1.5 KiB
Rust

mod character_tokenizer;
mod line_tokenizer;
mod markdown_tokenizer;
mod word_tokenizer;
use std::ops::Deref;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use token::Token;
#[cfg(feature = "wasm")]
use wasm_bindgen::prelude::*;
pub mod token;
/// Type alias for tokenizer functions that split a string into tokens
pub type Tokenizer<T> = dyn Fn(&str) -> Vec<Token<T>>;
#[cfg_attr(feature = "wasm", wasm_bindgen)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[cfg(feature = "wasm")]
pub enum BuiltinTokenizer {
Character = "Character",
Line = "Line",
Markdown = "Markdown",
Word = "Word",
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[cfg(not(feature = "wasm"))]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum BuiltinTokenizer {
Character,
Line,
Markdown,
Word,
}
impl Deref for BuiltinTokenizer {
type Target = Tokenizer<String>;
fn deref(&self) -> &Self::Target {
match self {
BuiltinTokenizer::Character => &character_tokenizer::character_tokenizer,
BuiltinTokenizer::Line => &line_tokenizer::line_tokenizer,
BuiltinTokenizer::Markdown => &markdown_tokenizer::markdown_tokenizer,
BuiltinTokenizer::Word => &word_tokenizer::word_tokenizer,
#[cfg(feature = "wasm")]
BuiltinTokenizer::__Invalid => panic!("Unexpected tokenizer type"),
}
}
}