Improve docs and compare with alternatives

2026-03-10 20:29:35 +00:00 · 2026-03-10 20:29:35 +00:00 · 3d382ad741
commit 3d382ad741
parent 5962feb90a
14 changed files with 106 additions and 69 deletions
--- a/src/lib.rs
+++ b/src/lib.rs
@ -59,7 +59,7 @@
 //!
 //! For specialised use cases, such as structured languages, custom
 //! tokenisation logic can be implemented by providing a function with the
-//! signature `Fn(&str) -> Vec<Token<String>>`::
+//! signature `Fn(&str) -> Vec<Token<String>>`:
 //!
 //! ```
 //! use reconcile_text::{reconcile, Token, BuiltinTokenizer};
@ -151,10 +151,11 @@
 //!     ]
 //! );
 //! ```
+//!
 //! ## Efficiently serialize changes
 //!
 //! The edits can be serialized into a compact representation without the full
-//! original text, making the size only depends on the changes made.
+//! original text, making the size depend only on the changes made.
 //!
 //! ```rust
 //! # #[cfg(feature = "serde")]
--- a/src/operation_transformation/edited_text.rs
+++ b/src/operation_transformation/edited_text.rs
@ -18,18 +18,16 @@ use crate::{
    utils::string_builder::StringBuilder,
 };

-/// A text document and a sequence of operations that can be applied to the text
-/// document. `EditedText` supports merging two sequences of operations using
-/// the principles of Operational Transformation.
+/// A text document with a sequence of operations derived from diffing it
+/// against an updated version. Supports merging two `EditedText` instances
+/// (from the same original) via Operational Transformation.
 ///
-/// It's mainly created through the `from_strings` method, then merged with
-/// another `EditedText` derived from the same original text and then applied to
-/// the original text to get the reconciled text of concurrent edits.
+/// Created via `from_strings`, `from_strings_with_tokenizer`, or `from_diff`,
+/// then merged with another `EditedText` and applied to get the reconciled
+/// text.
 ///
-/// In addition to text and operations, it also keeps track of cursor positions
-/// in the original text. The cursor positions are updated when the operations
-/// are applied, so that the cursor positions can be used to restore the
-/// cursor positions in the updated text.
+/// Also tracks cursor positions from the updated text, repositioning them
+/// when operations are applied.
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone, PartialEq, Default)]
 pub struct EditedText<'a, T>
@ -43,12 +41,8 @@ where
 }

 impl<'a> EditedText<'a, String> {
-    /// Create an `EditedText` from the given original (old) and updated (new)
-    /// strings. The returned `EditedText` represents the changes from the
-    /// original to the updated text. When the return value is applied to
-    /// the original text, it will result in the updated text. The default
-    /// word tokenizer is used to tokenize the text which splits the text on
-    /// whitespaces.
+    /// Create an `EditedText` from the given original and updated strings.
+    /// Uses the default word tokenizer (splits on word boundaries).
    #[must_use]
    pub fn from_strings(original: &'a str, updated: &TextWithCursors) -> Self {
        Self::from_strings_with_tokenizer(original, updated, &*BuiltinTokenizer::Word)
@ -59,11 +53,8 @@ impl<'a, T> EditedText<'a, T>
 where
    T: PartialEq + Clone + Debug,
 {
-    /// Create an `EditedText` from the given original (old) and updated (new)
-    /// strings. The returned `EditedText` represents the changes from the
-    /// original to the updated text. When the return value is applied to
-    /// the original text, it will result in the updated text. The tokenizer
-    /// function is used to tokenize the text.
+    /// Create an `EditedText` from the given original and updated strings
+    /// using the provided tokenizer.
    pub fn from_strings_with_tokenizer(
        original: &'a str,
        updated: &TextWithCursors,
@ -110,7 +101,7 @@ where
    ///
    /// # Panics
    ///
-    /// Panics if there's an integer overflow (in i64) when calculating new
+    /// Panics if there's an integer overflow (in isize) when calculating new
    /// cursor positions.
    #[must_use]
    #[allow(clippy::too_many_lines)]
@ -280,7 +271,7 @@ where
    /// Apply the operations to the text and return the resulting text in chunks
    /// together with the provenance describing where each chunk came from.
    ///
-    /// The result includes deleted spans as well.
+    /// Returns all spans including deletions (not present in the merged text).
    ///
    /// ```
    ///  use reconcile_text::{History, SpanWithHistory, BuiltinTokenizer, reconcile};
@ -422,7 +413,7 @@ where
        result
    }

-    /// Deserialize an `EditedText` from a change list and the original text.
+    /// Reconstruct an `EditedText` from a diff and the original text.
    ///
    /// # Errors
    ///
--- a/src/operation_transformation/operation.rs
+++ b/src/operation_transformation/operation.rs
@ -46,9 +46,8 @@ impl<T> Operation<T>
 where
    T: PartialEq + Clone + Debug,
 {
-    /// Creates an equal operation with the given index.
-    /// This operation is used to indicate that the text at the given index
-    /// is unchanged.
+    /// Creates an equal (retain) operation starting at the given character
+    /// offset in the original text.
    pub fn create_equal(order: usize, length: usize) -> Self {
        Operation::Equal {
            order,
@ -69,13 +68,14 @@ where
        }
    }

-    /// Creates an insert operation with the given index and text.
+    /// Creates an insert operation at the given character offset with the
+    /// given tokens.
    pub fn create_insert(order: usize, text: Vec<Token<T>>) -> Self {
        Operation::Insert { order, text }
    }

-    /// Creates a delete operation with the given index and number of
-    /// to-be-deleted characters.
+    /// Creates a delete operation at the given character offset for the
+    /// specified number of characters.
    pub fn create_delete(order: usize, deleted_character_count: usize) -> Self {
        Operation::Delete {
            order,
@ -179,8 +179,8 @@ where
        builder
    }

-    /// Returns the number of affected characters. It is always greater than 0
-    /// because empty operations cannot be created.
+    /// Returns the number of affected characters. May be 0 after
+    /// `merge_operations`.
    pub fn len(&self) -> usize {
        match self {
            Operation::Equal { length, .. } => *length,
@ -192,10 +192,9 @@ where
        }
    }

-    /// Merges the operation with the given context, producing a new operation
-    /// and updating the context. This implements a comples FSM that handles
-    /// the merging of operations in a way that is consistent with the text.
-    /// The contexts are updated in-place.
+    /// Adjusts this operation based on `previous_operation` from the other side
+    /// to avoid duplicating or conflicting changes. Updates
+    /// `previous_operation` in-place.
    #[allow(clippy::too_many_lines)]
    pub fn merge_operations(self, previous_operation: &mut Option<Self>) -> Operation<T> {
        let operation = self;
--- a/src/raw_operation.rs
+++ b/src/raw_operation.rs
@ -2,9 +2,9 @@ use std::fmt::Debug;

 use crate::{tokenizer::token::Token, utils::myers_diff::myers_diff};

-/// Text editing operation containing the to-be-changed `Tokens`-s.
+/// Text editing operation containing the affected tokens.
 ///
-/// `RawOperations` can be joined together when the underlying tokens
+/// `RawOperation`s can be joined together when the underlying tokens
 /// allow for joining subsequent operations.
 #[derive(Debug, Clone, PartialEq)]
 pub enum RawOperation<T>
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@ -12,7 +12,7 @@ use wasm_bindgen::prelude::*;

 pub mod token;

-/// A trait for tokenizers that take a string and return a list of tokens.
+/// Type alias for tokenizer functions that split a string into tokens.
 pub type Tokenizer<T> = dyn Fn(&str) -> Vec<Token<T>>;

 #[cfg_attr(feature = "wasm", wasm_bindgen)]
--- a/src/tokenizer/token.rs
+++ b/src/tokenizer/token.rs
@ -3,13 +3,11 @@ use std::fmt::Debug;
 #[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};

-/// A token is a string that has been normalized in some way.
+/// A token with a normalized form (used for diffing) and an original form
+/// (used when applying operations). Joinability flags control whether
+/// adjacent insertions interleave or group.
 ///
-/// A token consists of the normalized form is used for comparison, and the
-/// original form used for subsequently applying `Operation`-s to a text
-/// document.
-///
-/// It's UTF-8 compatible.
+/// UTF-8 compatible.
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone)]
 pub struct Token<T>
--- a/src/types/history.rs
+++ b/src/types/history.rs
@ -15,8 +15,7 @@ pub enum History {
    RemovedFromRight = "RemovedFromRight",
 }

-/// Simple enum for describing the result of `reconcile` in a flat list.
-/// When compiled to WASM, the enum values are the same as their names.
+/// Provenance label for each span returned by `apply_with_history`.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 #[cfg(not(feature = "wasm"))]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
--- a/src/types/number_or_text.rs
+++ b/src/types/number_or_text.rs
@ -27,6 +27,10 @@ impl TryFrom<JsValue> for NumberOrText {
        }

        if let Some(num) = value.clone().as_f64() {
+            if num.is_nan() {
+                return Err(DeserialisationError::new("NaN is not a valid number"));
+            }
+
            if num.abs() > INTEGRAL_LIMIT {
                return Err(DeserialisationError::new(
                    "Floating-point number exceeds safe integer limit, use BigInt instead",
--- a/src/types/span_with_history.rs
+++ b/src/types/span_with_history.rs
@ -5,8 +5,7 @@ use wasm_bindgen::prelude::*;

 use crate::types::history::History;

-/// Wrapper type for `(String, History)` where History describes the origin of
-/// `text`.
+/// A text span annotated with its origin in a merge result.
 #[allow(clippy::unsafe_derive_deserialize)]
 #[cfg_attr(feature = "wasm", wasm_bindgen)]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
--- a/src/types/text_with_cursors.rs
+++ b/src/types/text_with_cursors.rs
@ -12,12 +12,15 @@ pub struct TextWithCursors {

 #[cfg_attr(feature = "wasm", wasm_bindgen)]
 impl TextWithCursors {
+    /// # Panics
+    ///
+    /// Panics if any cursor's `char_index` exceeds the text's character length.
    #[cfg_attr(feature = "wasm", wasm_bindgen(constructor))]
    #[must_use]
    pub fn new(text: String, cursors: Vec<CursorPosition>) -> Self {
        let length = text.chars().count();
        for cursor in &cursors {
-            debug_assert!(
+            assert!(
                cursor.char_index <= length,
                // cursor.char_index == length means that the cursor is at the end
                "Cursor positions ({}) must be contained within the text (of length {length}) or \
--- a/src/utils/string_builder.rs
+++ b/src/utils/string_builder.rs
@ -1,9 +1,8 @@
 use std::{fmt, iter::Iterator};

-/// A helper for building a string in-order based on an original string and a
-/// series of insertions, deletions, and copies applied to it. It is safe to use
-/// with UTF-8 strings as all operations are based on character indices. The
-/// methods must be called in-order.
+/// A helper for building a string sequentially from an original string via
+/// insertions, deletions, and copies. All operations use character counts,
+/// safe for UTF-8. Methods must be called in-order.
 pub struct StringBuilder<'a> {
    original: Box<dyn Iterator<Item = char> + 'a>,
    buffer: String,
--- a/src/wasm.rs
+++ b/src/wasm.rs
@ -22,7 +22,7 @@ pub fn reconcile(
    crate::reconcile(parent, left, right, &*tokenizer).apply()
 }

-/// WASM wrapper around `crate::reconcile` for merging text.
+/// WASM wrapper around `crate::reconcile` that also returns provenance history.
 #[wasm_bindgen(js_name = reconcileWithHistory)]
 #[must_use]
 pub fn reconcile_with_history(
@ -94,12 +94,12 @@ pub fn diff(parent: &str, changed: &TextWithCursors, tokenizer: BuiltinTokenizer
        .collect()
 }

-/// Inverse of `diff`, applies a compact diff representation to a parent text
+/// Inverse of `diff`, applies a compact diff representation to a parent text.
 ///
-/// # Panics
+/// # Errors
 ///
-/// Panics if the diff format is invalid or there's an integer overflow when
-/// applying the diff.
+/// Returns a JS error if the diff format is invalid or references ranges
+/// exceeding the original text length.
 #[wasm_bindgen(js_name = undiff)]
 #[must_use]
 pub fn undiff(parent: &str, diff: Vec<JsValue>, tokenizer: BuiltinTokenizer) -> String {