53 lines
1.5 KiB
Rust
53 lines
1.5 KiB
Rust
mod character_tokenizer;
|
|
mod line_tokenizer;
|
|
mod markdown_tokenizer;
|
|
mod word_tokenizer;
|
|
|
|
use std::ops::Deref;
|
|
|
|
#[cfg(feature = "serde")]
|
|
use serde::{Deserialize, Serialize};
|
|
use token::Token;
|
|
#[cfg(feature = "wasm")]
|
|
use wasm_bindgen::prelude::*;
|
|
|
|
pub mod token;
|
|
|
|
/// Type alias for tokenizer functions that split a string into tokens
|
|
pub type Tokenizer<T> = dyn Fn(&str) -> Vec<Token<T>>;
|
|
|
|
#[cfg_attr(feature = "wasm", wasm_bindgen)]
|
|
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
#[cfg(feature = "wasm")]
|
|
pub enum BuiltinTokenizer {
|
|
Character = "Character",
|
|
Line = "Line",
|
|
Markdown = "Markdown",
|
|
Word = "Word",
|
|
}
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
#[cfg(not(feature = "wasm"))]
|
|
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
|
pub enum BuiltinTokenizer {
|
|
Character,
|
|
Line,
|
|
Markdown,
|
|
Word,
|
|
}
|
|
|
|
impl Deref for BuiltinTokenizer {
|
|
type Target = Tokenizer<String>;
|
|
|
|
fn deref(&self) -> &Self::Target {
|
|
match self {
|
|
BuiltinTokenizer::Character => &character_tokenizer::character_tokenizer,
|
|
BuiltinTokenizer::Line => &line_tokenizer::line_tokenizer,
|
|
BuiltinTokenizer::Markdown => &markdown_tokenizer::markdown_tokenizer,
|
|
BuiltinTokenizer::Word => &word_tokenizer::word_tokenizer,
|
|
#[cfg(feature = "wasm")]
|
|
BuiltinTokenizer::__Invalid => panic!("Unexpected tokenizer type"),
|
|
}
|
|
}
|
|
}
|