Clean up API and small fixes

2026-03-10 21:35:09 +00:00 · 2026-03-10 21:35:09 +00:00 · a80da338e4
commit a80da338e4
parent 665cdb2881
13 changed files with 56 additions and 36 deletions
--- a/src/lib.rs
+++ b/src/lib.rs
@ -100,11 +100,11 @@
 //! let parent = "Hello world";
 //! let left = TextWithCursors::new(
 //!     "Hello beautiful world".to_string(),
-//!     vec![CursorPosition { id: 1, char_index: 6 }] // After "Hello "
+//!     vec![CursorPosition::new(1, 6)] // After "Hello "
 //! );
 //! let right = TextWithCursors::new(
 //!     "Hi world".to_string(),
-//!     vec![CursorPosition { id: 2, char_index: 0 }] // At the beginning
+//!     vec![CursorPosition::new(2, 0)] // At the beginning
 //! );
 //!
 //! let result = reconcile(parent, &left, &right, &*BuiltinTokenizer::Word);
@ -173,7 +173,7 @@
 //!     &changes.into()
 //! );
 //!
-//! let serialized = serde_yaml::to_string(&result.to_diff()).unwrap();
+//! let serialized = serde_yaml::to_string(&result.to_diff().unwrap()).unwrap();
 //! assert_eq!(
 //!     serialized,
 //!     concat!(
--- a/src/operation_transformation.rs
+++ b/src/operation_transformation.rs
@ -156,7 +156,7 @@ mod test {
                    .unwrap()
                    .chars()
                    .skip(range.start)
-                    .take(range.end)
+                    .take(range.len())
                    .collect::<String>()
            })
            .collect::<Vec<_>>();
--- a/src/operation_transformation/diff_error.rs
+++ b/src/operation_transformation/diff_error.rs
@ -16,4 +16,11 @@ pub enum DiffError {
        /// The number of characters available from the position
        available: usize,
    },
+
+    /// A character count was too large to represent as i64
+    #[error("Integer overflow: value {value} cannot be represented as i64")]
+    IntegerOverflow {
+        /// The value that caused the overflow
+        value: usize,
+    },
 }
--- a/src/raw_operation.rs
+++ b/src/raw_operation.rs
@ -22,7 +22,7 @@ where
 {
    pub fn vec_from(left: &[Token<T>], right: &[Token<T>]) -> Vec<Self> { myers_diff(left, right) }

-    pub fn tokens(&self) -> &Vec<Token<T>> {
+    pub fn tokens(&self) -> &[Token<T>] {
        match self {
            RawOperation::Insert(tokens)
            | RawOperation::Delete(tokens)
@ -34,7 +34,9 @@ where
        self.tokens().iter().map(Token::get_original_length).sum()
    }

-    pub fn get_original_text(self) -> String { self.tokens().iter().map(Token::original).collect() }
+    pub fn get_original_text(&self) -> String {
+        self.tokens().iter().map(Token::original).collect()
+    }

    pub fn is_left_joinable(&self) -> bool {
        let first_token = self.tokens().first();
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@ -1,5 +1,6 @@
 mod character_tokenizer;
 mod line_tokenizer;
+mod markdown_tokenizer;
 mod word_tokenizer;

 use std::ops::Deref;
@ -22,6 +23,7 @@ pub type Tokenizer<T> = dyn Fn(&str) -> Vec<Token<T>>;
 pub enum BuiltinTokenizer {
    Character = "Character",
    Line = "Line",
+    Markdown = "Markdown",
    Word = "Word",
 }

@ -31,6 +33,7 @@ pub enum BuiltinTokenizer {
 pub enum BuiltinTokenizer {
    Character,
    Line,
+    Markdown,
    Word,
 }

@ -41,6 +44,7 @@ impl Deref for BuiltinTokenizer {
        match self {
            BuiltinTokenizer::Character => &character_tokenizer::character_tokenizer,
            BuiltinTokenizer::Line => &line_tokenizer::line_tokenizer,
+            BuiltinTokenizer::Markdown => &markdown_tokenizer::markdown_tokenizer,
            BuiltinTokenizer::Word => &word_tokenizer::word_tokenizer,
            #[cfg(feature = "wasm")]
            BuiltinTokenizer::__Invalid => panic!("Unexpected tokenizer type"),
--- a/src/types/cursor_position.rs
+++ b/src/types/cursor_position.rs
@ -10,8 +10,8 @@ use wasm_bindgen::prelude::*;
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone, PartialEq, Default)]
 pub struct CursorPosition {
-    pub id: usize,
-    pub char_index: usize,
+    pub(crate) id: usize,
+    pub(crate) char_index: usize,
 }

 #[cfg_attr(feature = "wasm", wasm_bindgen)]
--- a/src/types/text_with_cursors.rs
+++ b/src/types/text_with_cursors.rs
@ -39,6 +39,11 @@ impl TextWithCursors {
    pub fn cursors(&self) -> Vec<CursorPosition> { self.cursors.clone() }
 }

+impl TextWithCursors {
+    #[must_use]
+    pub fn text_ref(&self) -> &str { &self.text }
+}
+
 impl<'a> From<&'a str> for TextWithCursors {
    fn from(text: &'a str) -> Self {
        Self {
--- a/src/utils/myers_diff.rs
+++ b/src/utils/myers_diff.rs
@ -90,7 +90,7 @@ impl V {
        let offset = isize::try_from(max_d).expect("max_d must fit in isize");
        Self {
            offset,
-            v: vec![0; 2 * max_d],
+            v: vec![0; 2 * max_d + 1],
        }
    }

--- a/src/utils/string_builder.rs
+++ b/src/utils/string_builder.rs
@ -1,10 +1,10 @@
-use std::{fmt, iter::Iterator};
+use std::{fmt, str::Chars};

 /// A helper for building a string sequentially from an original string via
 /// insertions, deletions, and copies. All operations use character counts,
 /// safe for UTF-8. Methods must be called in-order.
 pub struct StringBuilder<'a> {
-    original: Box<dyn Iterator<Item = char> + 'a>,
+    original: Chars<'a>,
    buffer: String,

    #[cfg(debug_assertions)]
@ -26,7 +26,7 @@ impl fmt::Debug for StringBuilder<'_> {
 impl StringBuilder<'_> {
    pub fn new(original: &str) -> StringBuilder<'_> {
        StringBuilder {
-            original: Box::new(original.chars()),
+            original: original.chars(),
            buffer: String::with_capacity(original.len()),

            #[cfg(debug_assertions)]
--- a/src/wasm.rs
+++ b/src/wasm.rs
@ -5,9 +5,6 @@ use wasm_bindgen::prelude::*;

 use crate::{BuiltinTokenizer, CursorPosition, EditedText, SpanWithHistory, TextWithCursors};

-#[global_allocator]
-static ALLOC: wee_alloc::WeeAlloc<'_> = wee_alloc::WeeAlloc::INIT;
-
 /// WASM wrapper around `crate::reconcile` for merging text
 #[wasm_bindgen(js_name = reconcile)]
 #[must_use]
@ -34,11 +31,11 @@ pub fn reconcile_with_history(
    set_panic_hook();

    let reconciled = crate::reconcile(parent, left, right, &*tokenizer);
-    let text_with_cursors = reconciled.apply();
+    let (text_with_cursors, history) = reconciled.apply_with_all();

    TextWithCursorsAndHistory {
        text_with_cursors,
-        history: reconciled.apply_with_history(),
+        history,
    }
 }

@ -81,17 +78,23 @@ pub fn generic_reconcile(

 /// WASM wrapper around getting a compact diff representation of two texts as a
 /// list of numbers and strings
+///
+/// # Errors
+///
+/// Returns a JS error if integer overflow occurs during diff computation.
 #[wasm_bindgen(js_name = diff)]
-#[must_use]
-pub fn diff(parent: &str, changed: &TextWithCursors, tokenizer: BuiltinTokenizer) -> Vec<JsValue> {
+pub fn diff(
+    parent: &str,
+    changed: &TextWithCursors,
+    tokenizer: BuiltinTokenizer,
+) -> Result<Vec<JsValue>, JsValue> {
    set_panic_hook();

    let edited_text = EditedText::from_strings_with_tokenizer(parent, changed, &*tokenizer);
    edited_text
        .to_diff()
-        .into_iter()
-        .map(std::convert::Into::into)
-        .collect()
+        .map(|diff| diff.into_iter().map(std::convert::Into::into).collect())
+        .map_err(|e| JsValue::from_str(&e.to_string()))
 }

 /// Inverse of `diff`, applies a compact diff representation to a parent text
--- a/tests/example_document.rs
+++ b/tests/example_document.rs
@ -65,9 +65,9 @@ impl ExampleDocument {
        let mut result = merged.text();
        for (i, cursor) in merged.cursors().iter().enumerate() {
            assert!(
-                cursor.char_index <= result.len(), // equals in case of insert at the end
+                cursor.char_index() <= result.len(), // equals in case of insert at the end
                "Cursor index out of bounds: {} > {} when testing for '{}.'",
-                cursor.char_index,
+                cursor.char_index(),
                result.len(),
                result
            );
@ -75,7 +75,7 @@ impl ExampleDocument {
            result.insert(
                result
                    .char_indices()
-                    .nth(cursor.char_index + i)
+                    .nth(cursor.char_index() + i)
                    .map_or_else(|| result.len(), |(byte_index, _)| byte_index), /* find the utf8 char index of the insert
                                                                                  * in byte index */
                '|',
@ -94,10 +94,7 @@ impl ExampleDocument {
        let mut cursors = Vec::new();
        for (i, c) in text.chars().enumerate() {
            if c == '|' {
-                cursors.push(CursorPosition {
-                    id: 0,
-                    char_index: i - cursors.len(),
-                });
+                cursors.push(CursorPosition::new(0, i - cursors.len()));
            }
        }
        cursors
--- a/tests/test.rs
+++ b/tests/test.rs
@ -49,12 +49,14 @@ fn test_document_one_way_with_serialisation() {
            &*BuiltinTokenizer::Word,
        );

-        let serialised_left =
-            serde_yaml::from_str(&serde_yaml::to_string(&left_operations.to_diff()).unwrap())
-                .unwrap();
-        let serialised_right =
-            serde_yaml::from_str(&serde_yaml::to_string(&right_operations.to_diff()).unwrap())
-                .unwrap();
+        let serialised_left = serde_yaml::from_str(
+            &serde_yaml::to_string(&left_operations.to_diff().unwrap()).unwrap(),
+        )
+        .unwrap();
+        let serialised_right = serde_yaml::from_str(
+            &serde_yaml::to_string(&right_operations.to_diff().unwrap()).unwrap(),
+        )
+        .unwrap();

        let restored_left_operations =
            EditedText::from_diff(&parent, serialised_left, &*BuiltinTokenizer::Word).unwrap();
--- a/tests/wasm.rs
+++ b/tests/wasm.rs
@ -60,7 +60,7 @@ fn test_diff() {
    let parent = "hello ";
    let changed = "world";

-    let result = diff(parent, &changed.into(), BuiltinTokenizer::Word);
+    let result = diff(parent, &changed.into(), BuiltinTokenizer::Word).unwrap();

    assert_eq!(result.len(), 2);
    let first: i64 = result[0].clone().try_into().unwrap();