Clean up API and small fixes

This commit is contained in:
Andras Schmelczer 2026-03-10 21:35:09 +00:00
parent 665cdb2881
commit a80da338e4
13 changed files with 56 additions and 36 deletions

View file

@ -100,11 +100,11 @@
//! let parent = "Hello world"; //! let parent = "Hello world";
//! let left = TextWithCursors::new( //! let left = TextWithCursors::new(
//! "Hello beautiful world".to_string(), //! "Hello beautiful world".to_string(),
//! vec![CursorPosition { id: 1, char_index: 6 }] // After "Hello " //! vec![CursorPosition::new(1, 6)] // After "Hello "
//! ); //! );
//! let right = TextWithCursors::new( //! let right = TextWithCursors::new(
//! "Hi world".to_string(), //! "Hi world".to_string(),
//! vec![CursorPosition { id: 2, char_index: 0 }] // At the beginning //! vec![CursorPosition::new(2, 0)] // At the beginning
//! ); //! );
//! //!
//! let result = reconcile(parent, &left, &right, &*BuiltinTokenizer::Word); //! let result = reconcile(parent, &left, &right, &*BuiltinTokenizer::Word);
@ -173,7 +173,7 @@
//! &changes.into() //! &changes.into()
//! ); //! );
//! //!
//! let serialized = serde_yaml::to_string(&result.to_diff()).unwrap(); //! let serialized = serde_yaml::to_string(&result.to_diff().unwrap()).unwrap();
//! assert_eq!( //! assert_eq!(
//! serialized, //! serialized,
//! concat!( //! concat!(

View file

@ -156,7 +156,7 @@ mod test {
.unwrap() .unwrap()
.chars() .chars()
.skip(range.start) .skip(range.start)
.take(range.end) .take(range.len())
.collect::<String>() .collect::<String>()
}) })
.collect::<Vec<_>>(); .collect::<Vec<_>>();

View file

@ -16,4 +16,11 @@ pub enum DiffError {
/// The number of characters available from the position /// The number of characters available from the position
available: usize, available: usize,
}, },
/// A character count was too large to represent as i64
#[error("Integer overflow: value {value} cannot be represented as i64")]
IntegerOverflow {
/// The value that caused the overflow
value: usize,
},
} }

View file

@ -22,7 +22,7 @@ where
{ {
pub fn vec_from(left: &[Token<T>], right: &[Token<T>]) -> Vec<Self> { myers_diff(left, right) } pub fn vec_from(left: &[Token<T>], right: &[Token<T>]) -> Vec<Self> { myers_diff(left, right) }
pub fn tokens(&self) -> &Vec<Token<T>> { pub fn tokens(&self) -> &[Token<T>] {
match self { match self {
RawOperation::Insert(tokens) RawOperation::Insert(tokens)
| RawOperation::Delete(tokens) | RawOperation::Delete(tokens)
@ -34,7 +34,9 @@ where
self.tokens().iter().map(Token::get_original_length).sum() self.tokens().iter().map(Token::get_original_length).sum()
} }
pub fn get_original_text(self) -> String { self.tokens().iter().map(Token::original).collect() } pub fn get_original_text(&self) -> String {
self.tokens().iter().map(Token::original).collect()
}
pub fn is_left_joinable(&self) -> bool { pub fn is_left_joinable(&self) -> bool {
let first_token = self.tokens().first(); let first_token = self.tokens().first();

View file

@ -1,5 +1,6 @@
mod character_tokenizer; mod character_tokenizer;
mod line_tokenizer; mod line_tokenizer;
mod markdown_tokenizer;
mod word_tokenizer; mod word_tokenizer;
use std::ops::Deref; use std::ops::Deref;
@ -22,6 +23,7 @@ pub type Tokenizer<T> = dyn Fn(&str) -> Vec<Token<T>>;
pub enum BuiltinTokenizer { pub enum BuiltinTokenizer {
Character = "Character", Character = "Character",
Line = "Line", Line = "Line",
Markdown = "Markdown",
Word = "Word", Word = "Word",
} }
@ -31,6 +33,7 @@ pub enum BuiltinTokenizer {
pub enum BuiltinTokenizer { pub enum BuiltinTokenizer {
Character, Character,
Line, Line,
Markdown,
Word, Word,
} }
@ -41,6 +44,7 @@ impl Deref for BuiltinTokenizer {
match self { match self {
BuiltinTokenizer::Character => &character_tokenizer::character_tokenizer, BuiltinTokenizer::Character => &character_tokenizer::character_tokenizer,
BuiltinTokenizer::Line => &line_tokenizer::line_tokenizer, BuiltinTokenizer::Line => &line_tokenizer::line_tokenizer,
BuiltinTokenizer::Markdown => &markdown_tokenizer::markdown_tokenizer,
BuiltinTokenizer::Word => &word_tokenizer::word_tokenizer, BuiltinTokenizer::Word => &word_tokenizer::word_tokenizer,
#[cfg(feature = "wasm")] #[cfg(feature = "wasm")]
BuiltinTokenizer::__Invalid => panic!("Unexpected tokenizer type"), BuiltinTokenizer::__Invalid => panic!("Unexpected tokenizer type"),

View file

@ -10,8 +10,8 @@ use wasm_bindgen::prelude::*;
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone, PartialEq, Default)] #[derive(Debug, Clone, PartialEq, Default)]
pub struct CursorPosition { pub struct CursorPosition {
pub id: usize, pub(crate) id: usize,
pub char_index: usize, pub(crate) char_index: usize,
} }
#[cfg_attr(feature = "wasm", wasm_bindgen)] #[cfg_attr(feature = "wasm", wasm_bindgen)]

View file

@ -39,6 +39,11 @@ impl TextWithCursors {
pub fn cursors(&self) -> Vec<CursorPosition> { self.cursors.clone() } pub fn cursors(&self) -> Vec<CursorPosition> { self.cursors.clone() }
} }
impl TextWithCursors {
#[must_use]
pub fn text_ref(&self) -> &str { &self.text }
}
impl<'a> From<&'a str> for TextWithCursors { impl<'a> From<&'a str> for TextWithCursors {
fn from(text: &'a str) -> Self { fn from(text: &'a str) -> Self {
Self { Self {

View file

@ -90,7 +90,7 @@ impl V {
let offset = isize::try_from(max_d).expect("max_d must fit in isize"); let offset = isize::try_from(max_d).expect("max_d must fit in isize");
Self { Self {
offset, offset,
v: vec![0; 2 * max_d], v: vec![0; 2 * max_d + 1],
} }
} }

View file

@ -1,10 +1,10 @@
use std::{fmt, iter::Iterator}; use std::{fmt, str::Chars};
/// A helper for building a string sequentially from an original string via /// A helper for building a string sequentially from an original string via
/// insertions, deletions, and copies. All operations use character counts, /// insertions, deletions, and copies. All operations use character counts,
/// safe for UTF-8. Methods must be called in-order. /// safe for UTF-8. Methods must be called in-order.
pub struct StringBuilder<'a> { pub struct StringBuilder<'a> {
original: Box<dyn Iterator<Item = char> + 'a>, original: Chars<'a>,
buffer: String, buffer: String,
#[cfg(debug_assertions)] #[cfg(debug_assertions)]
@ -26,7 +26,7 @@ impl fmt::Debug for StringBuilder<'_> {
impl StringBuilder<'_> { impl StringBuilder<'_> {
pub fn new(original: &str) -> StringBuilder<'_> { pub fn new(original: &str) -> StringBuilder<'_> {
StringBuilder { StringBuilder {
original: Box::new(original.chars()), original: original.chars(),
buffer: String::with_capacity(original.len()), buffer: String::with_capacity(original.len()),
#[cfg(debug_assertions)] #[cfg(debug_assertions)]

View file

@ -5,9 +5,6 @@ use wasm_bindgen::prelude::*;
use crate::{BuiltinTokenizer, CursorPosition, EditedText, SpanWithHistory, TextWithCursors}; use crate::{BuiltinTokenizer, CursorPosition, EditedText, SpanWithHistory, TextWithCursors};
#[global_allocator]
static ALLOC: wee_alloc::WeeAlloc<'_> = wee_alloc::WeeAlloc::INIT;
/// WASM wrapper around `crate::reconcile` for merging text /// WASM wrapper around `crate::reconcile` for merging text
#[wasm_bindgen(js_name = reconcile)] #[wasm_bindgen(js_name = reconcile)]
#[must_use] #[must_use]
@ -34,11 +31,11 @@ pub fn reconcile_with_history(
set_panic_hook(); set_panic_hook();
let reconciled = crate::reconcile(parent, left, right, &*tokenizer); let reconciled = crate::reconcile(parent, left, right, &*tokenizer);
let text_with_cursors = reconciled.apply(); let (text_with_cursors, history) = reconciled.apply_with_all();
TextWithCursorsAndHistory { TextWithCursorsAndHistory {
text_with_cursors, text_with_cursors,
history: reconciled.apply_with_history(), history,
} }
} }
@ -81,17 +78,23 @@ pub fn generic_reconcile(
/// WASM wrapper around getting a compact diff representation of two texts as a /// WASM wrapper around getting a compact diff representation of two texts as a
/// list of numbers and strings /// list of numbers and strings
///
/// # Errors
///
/// Returns a JS error if integer overflow occurs during diff computation.
#[wasm_bindgen(js_name = diff)] #[wasm_bindgen(js_name = diff)]
#[must_use] pub fn diff(
pub fn diff(parent: &str, changed: &TextWithCursors, tokenizer: BuiltinTokenizer) -> Vec<JsValue> { parent: &str,
changed: &TextWithCursors,
tokenizer: BuiltinTokenizer,
) -> Result<Vec<JsValue>, JsValue> {
set_panic_hook(); set_panic_hook();
let edited_text = EditedText::from_strings_with_tokenizer(parent, changed, &*tokenizer); let edited_text = EditedText::from_strings_with_tokenizer(parent, changed, &*tokenizer);
edited_text edited_text
.to_diff() .to_diff()
.into_iter() .map(|diff| diff.into_iter().map(std::convert::Into::into).collect())
.map(std::convert::Into::into) .map_err(|e| JsValue::from_str(&e.to_string()))
.collect()
} }
/// Inverse of `diff`, applies a compact diff representation to a parent text /// Inverse of `diff`, applies a compact diff representation to a parent text

View file

@ -65,9 +65,9 @@ impl ExampleDocument {
let mut result = merged.text(); let mut result = merged.text();
for (i, cursor) in merged.cursors().iter().enumerate() { for (i, cursor) in merged.cursors().iter().enumerate() {
assert!( assert!(
cursor.char_index <= result.len(), // equals in case of insert at the end cursor.char_index() <= result.len(), // equals in case of insert at the end
"Cursor index out of bounds: {} > {} when testing for '{}.'", "Cursor index out of bounds: {} > {} when testing for '{}.'",
cursor.char_index, cursor.char_index(),
result.len(), result.len(),
result result
); );
@ -75,7 +75,7 @@ impl ExampleDocument {
result.insert( result.insert(
result result
.char_indices() .char_indices()
.nth(cursor.char_index + i) .nth(cursor.char_index() + i)
.map_or_else(|| result.len(), |(byte_index, _)| byte_index), /* find the utf8 char index of the insert .map_or_else(|| result.len(), |(byte_index, _)| byte_index), /* find the utf8 char index of the insert
* in byte index */ * in byte index */
'|', '|',
@ -94,10 +94,7 @@ impl ExampleDocument {
let mut cursors = Vec::new(); let mut cursors = Vec::new();
for (i, c) in text.chars().enumerate() { for (i, c) in text.chars().enumerate() {
if c == '|' { if c == '|' {
cursors.push(CursorPosition { cursors.push(CursorPosition::new(0, i - cursors.len()));
id: 0,
char_index: i - cursors.len(),
});
} }
} }
cursors cursors

View file

@ -49,11 +49,13 @@ fn test_document_one_way_with_serialisation() {
&*BuiltinTokenizer::Word, &*BuiltinTokenizer::Word,
); );
let serialised_left = let serialised_left = serde_yaml::from_str(
serde_yaml::from_str(&serde_yaml::to_string(&left_operations.to_diff()).unwrap()) &serde_yaml::to_string(&left_operations.to_diff().unwrap()).unwrap(),
)
.unwrap(); .unwrap();
let serialised_right = let serialised_right = serde_yaml::from_str(
serde_yaml::from_str(&serde_yaml::to_string(&right_operations.to_diff()).unwrap()) &serde_yaml::to_string(&right_operations.to_diff().unwrap()).unwrap(),
)
.unwrap(); .unwrap();
let restored_left_operations = let restored_left_operations =

View file

@ -60,7 +60,7 @@ fn test_diff() {
let parent = "hello "; let parent = "hello ";
let changed = "world"; let changed = "world";
let result = diff(parent, &changed.into(), BuiltinTokenizer::Word); let result = diff(parent, &changed.into(), BuiltinTokenizer::Word).unwrap();
assert_eq!(result.len(), 2); assert_eq!(result.len(), 2);
let first: i64 = result[0].clone().try_into().unwrap(); let first: i64 = result[0].clone().try_into().unwrap();