Remove the exponential API

This commit is contained in:
Andras Schmelczer 2025-06-29 19:03:55 +01:00
parent 0448e30dd9
commit 4fda83fe17
8 changed files with 248 additions and 150 deletions

View file

@ -1,7 +1,44 @@
mod word_tokenizer;
use std::ops::Deref;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use token::Token;
#[cfg(feature = "wasm")]
use wasm_bindgen::prelude::*;
pub mod token;
pub mod word_tokenizer;
/// A trait for tokenizers that take a string and return a list of tokens.
pub type Tokenizer<T> = dyn Fn(&str) -> Vec<Token<T>>;
#[cfg_attr(feature = "wasm", wasm_bindgen)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[cfg(feature = "wasm")]
pub enum BuiltinTokenizer {
Character = "Character",
Word = "Word",
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[cfg(not(feature = "wasm"))]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum BuiltinTokenizer {
Character,
Word,
}
impl Deref for BuiltinTokenizer {
type Target = Tokenizer<String>;
fn deref(&self) -> &Self::Target {
match self {
BuiltinTokenizer::Character => todo!(),
BuiltinTokenizer::Word => &word_tokenizer::word_tokenizer,
#[cfg(feature = "wasm")]
BuiltinTokenizer::__Invalid => panic!("Unexpected tokenizer type"),
}
}
}