Improve compact diff API (#24)

* Remove is_binary from API * Format * Rename file * Test with more feature combinations * Don't depend on serde for wasm * Fix lint & tests * Don't unwrap to MAX number * Expose undiff to JS * Add undiff tests * Lint * Change name
2025-11-16 15:43:19 +00:00 · 2025-11-16 15:43:19 +00:00 · e85eb485e8
commit e85eb485e8
parent 6191d1adb3
20 changed files with 430 additions and 424 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -124,12 +124,6 @@ version = "0.4.27"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"

-[[package]]
-name = "memchr"
-version = "2.7.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
-
 [[package]]
 name = "memory_units"
 version = "0.4.0"
@ -188,7 +182,6 @@ dependencies = [
 "insta",
 "pretty_assertions",
 "serde",
- "serde_json",
 "serde_yaml",
 "test-case",
 "wasm-bindgen",
@ -247,19 +240,6 @@ dependencies = [
 "syn",
 ]

-[[package]]
-name = "serde_json"
-version = "1.0.145"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c"
-dependencies = [
- "itoa",
- "memchr",
- "ryu",
- "serde",
- "serde_core",
-]
-
 [[package]]
 name = "serde_yaml"
 version = "0.9.34+deprecated"
--- a/Cargo.toml
+++ b/Cargo.toml
@ -24,7 +24,6 @@ path = "examples/merge-file.rs"
 serde = { version = "1.0.219", optional = true, features = ["derive"] }

 wasm-bindgen = { version = "0.2.99", optional = true }
-serde_json = { version = "1.0.145", optional = true }

 # The `console_error_panic_hook` crate provides better debugging of panics by
 # logging them with `console.error`. This is great for development, but requires
@ -37,9 +36,9 @@ wee_alloc = { version = "0.4.2", optional = true }
 [features]
 default = []
 serde = [ "dep:serde" ]
-wasm = [ "dep:wasm-bindgen", "dep:wee_alloc", "dep:serde_json", "serde" ]
+wasm = [ "dep:wasm-bindgen", "dep:wee_alloc" ]
 console_error_panic_hook = [ "dep:console_error_panic_hook" ]
-all = [ "wasm", "console_error_panic_hook" ]
+all = [ "wasm", "console_error_panic_hook", "serde" ]

 [dev-dependencies]
 insta = "1.43.2"
--- a/examples/website/src/index.html
+++ b/examples/website/src/index.html
@ -23,7 +23,12 @@
    <link rel="icon" type="image/x-icon" href="favicon.ico" />
    <title>reconcile-text: conflict-free 3-way text merging</title>
    <link inline inline-asset="index.css" inline-asset-delete />
-    <script defer data-domain="reconcile" data-api="https://stats.schmelczer.dev/status" src="https://stats.schmelczer.dev/js/script.outbound-links.js"></script>
+    <script
+      defer
+      data-domain="reconcile"
+      data-api="https://stats.schmelczer.dev/status"
+      src="https://stats.schmelczer.dev/js/script.outbound-links.js"
+    ></script>
  </head>
  <body>
    <div class="background"></div>
--- a/reconcile-js/package-lock.json
+++ b/reconcile-js/package-lock.json
@ -1231,13 +1231,13 @@
      "license": "MIT"
    },
    "node_modules/@types/node": {
-      "version": "24.0.10",
-      "resolved": "https://registry.npmjs.org/@types/node/-/node-24.0.10.tgz",
-      "integrity": "sha512-ENHwaH+JIRTDIEEbDK6QSQntAYGtbvdDXnMXnZaZ6k13Du1dPMmprkEHIL7ok2Wl2aZevetwTAb5S+7yIF+enA==",
+      "version": "24.10.1",
+      "resolved": "https://registry.npmjs.org/@types/node/-/node-24.10.1.tgz",
+      "integrity": "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
-        "undici-types": "~7.8.0"
+        "undici-types": "~7.16.0"
      }
    },
    "node_modules/@types/stack-utils": {
@ -5274,9 +5274,9 @@
      }
    },
    "node_modules/undici-types": {
-      "version": "7.8.0",
-      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.8.0.tgz",
-      "integrity": "sha512-9UJ2xGDvQ43tYyVMpuHlsgApydB8ZKfVYTsLDhXkFL/6gfkp+U8xTGdh8pMJv1SpZna0zxG1DwsKZsreLbXBxw==",
+      "version": "7.16.0",
+      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz",
+      "integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==",
      "dev": true,
      "license": "MIT"
    },
--- a/reconcile-js/src/index.test.ts
+++ b/reconcile-js/src/index.test.ts
@ -1,4 +1,9 @@
-import { reconcile, reconcileWithHistory } from './index';
+import { reconcile, reconcileWithHistory, diff, undiff } from './index';
+import * as fs from 'fs';
+import * as path from 'path';
+import { fileURLToPath } from 'url';
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));

 describe('reconcile', () => {
  it('call reconcile without cursors', () => {
@ -44,3 +49,35 @@ describe('reconcile', () => {
    expect(result.history.length).toBeGreaterThan(0);
  });
 });
+
+describe('test_diff_and_undiff_are_inverse', () => {
+  const resourcesPath = path.join(__dirname, '../../tests/resources');
+
+  const readFileSlice = (fileName: string, start: number, end: number): string => {
+    const filePath = path.join(resourcesPath, fileName);
+    const content = fs.readFileSync(filePath, 'utf-8');
+    const chars = Array.from(content); // Handle unicode properly
+    return chars.slice(start, Math.min(end, chars.length)).join('');
+  };
+
+  const files = ['pride_and_prejudice.txt', 'room_with_a_view.txt', 'blns.txt'];
+
+  const ranges = [{ start: 0, end: 50000 }];
+
+  files.forEach((file1) => {
+    files.forEach((file2) => {
+      ranges.forEach((range1) => {
+        ranges.forEach((range2) => {
+          it(`should diff & undiff ${file1}[${range1.start}..${range1.end}], ${file2}[${range2.start}..${range2.end}] without panic`, () => {
+            const content1 = readFileSlice(file1, range1.start, range1.end);
+            const content2 = readFileSlice(file2, range2.start, range2.end);
+
+            const changes = diff(content1, content2);
+            const actual = undiff(content1, changes);
+            expect(actual).toEqual(content2);
+          });
+        });
+      });
+    });
+  });
+});
--- a/reconcile-js/src/index.ts
+++ b/reconcile-js/src/index.ts
@ -4,8 +4,8 @@ import {
  TextWithCursors as wasmTextWithCursors,
  SpanWithHistory as wasmSpanWithHistory,
  reconcileWithHistory as wasmReconcileWithHistory,
-  isBinary as wasmIsBinary,
-  getCompactDiff as wasmGetCompactDiff,
+  diff as wasmDiff,
+  undiff as wasmUndiff,
  initSync,
 } from 'reconcile-text';

@ -183,22 +183,22 @@ export function reconcile(
 /**
 * Generates a compact diff representation between an original and changed text.
 *
- * These can be parsed and unpacked using Rust crate's EditedText::from_change_set.
+ * These can be parsed and unpacked using the `undiff` function or the Rust crate's EditedText::from_diff.
+ * Cursor positions are omitted from the diff result.
 *
 * This function computes the differences between two versions of text and returns
- * a compact string representation of those changes. The returned format is
- * serialised JSON.
+ * a compact representation of those changes.
 *
 * @param original - The original/base version of the text
 * @param changed - The modified version of the text (either string or TextWithCursors with cursor positions)
 * @param tokenizer - The tokenisation strategy, which is the same as used in `reconcile`.
- * @returns A compact string representation of the diff between original and changed text
+ * @returns An array representing the compact diff, with inserts as strings and deletes as negative integers.
 */
-export function getCompactDiff(
+export function diff(
  original: string,
  changed: string | TextWithOptionalCursors,
  tokenizer: BuiltinTokenizer = 'Word'
-): string {
+): Array<number | string> {
  init();

  if (!BUILTIN_TOKENIZERS.includes(tokenizer)) {
@ -207,13 +207,38 @@ export function getCompactDiff(

  const changedWasm = toWasmTextWithCursors(changed);

-  const result = wasmGetCompactDiff(original, changedWasm, tokenizer);
+  const result = wasmDiff(original, changedWasm, tokenizer);

  changedWasm.free();

  return result;
 }

+/**
+ * Applies a compact diff to an original text to reconstruct the changed version.
+ *
+ * This function takes an original text and a compact diff representation (as produced
+ * by the `diff` function) and reconstructs the modified text.
+ *
+ * @param original - The original/base version of the text
+ * @param diff - The compact diff array representing changes (inserts as strings, deletes as negative integers)
+ * @param tokenizer - The tokenisation strategy, which is the same as used in `reconcile`.
+ * @returns The reconstructed changed text as a string.
+ */
+export function undiff(
+  original: string,
+  diff: Array<number | string>,
+  tokenizer: BuiltinTokenizer = 'Word'
+): string {
+  init();
+
+  if (!BUILTIN_TOKENIZERS.includes(tokenizer)) {
+    throw new Error(UNSUPPORTED_TOKENIZER_ERROR);
+  }
+
+  return wasmUndiff(original, diff, tokenizer);
+}
+
 /**
 * Merges three versions of text and returns detailed provenance information.
 *
@ -272,19 +297,6 @@ export function reconcileWithHistory(
  };
 }

-/**
- * Check (using heuristics) if the given data is binary or text content.
- *
- * Only text inputs can be reconciled using the library's functions.
- *
- * @param data - The data to check for binary content. This should be a Uint8Array.
- * @returns True if the data is likely binary, false if it is likely text.
- */
-export function isBinary(data: Uint8Array): boolean {
-  init();
-  return wasmIsBinary(data);
-}
-
 function init() {
  if (isInitialised) {
    return;
--- a/reconcile-js/tsconfig.json
+++ b/reconcile-js/tsconfig.json
@ -9,6 +9,5 @@
    "declarationDir": "./dist/types",
    "skipLibCheck": true,
    "inlineSourceMap": true
-  },
-  "exclude": ["./dist", "**/*.test.ts"]
+  }
 }
--- a/scripts/test.sh
+++ b/scripts/test.sh
@ -4,7 +4,12 @@ set -e

 wasm-pack build --target web --features wasm
 cargo test --verbose --features serde -- --include-ignored 
-cargo test --features serde,wasm
+
+cargo test 
+cargo test --features serde
+cargo test --features wasm
+cargo test --features all
+
 wasm-pack test --node --features wasm

 cd reconcile-js
--- a/src/lib.rs
+++ b/src/lib.rs
@ -157,6 +157,8 @@
 //! original text, making the size only depends on the changes made.
 //!
 //! ```rust
+//! # #[cfg(feature = "serde")]
+//! # {
 //! use reconcile_text::{EditedText, BuiltinTokenizer};
 //! use serde_yaml;
 //! use pretty_assertions::assert_eq;
@ -170,20 +172,18 @@
 //!     &changes.into()
 //! );
 //!
-//! let serialized = serde_yaml::to_string(&result.to_change_set()).unwrap();
+//! let serialized = serde_yaml::to_string(&result.to_diff()).unwrap();
 //! assert_eq!(
 //!     serialized,
 //!     concat!(
-//!         "operations:\n",
 //!         "- 15\n",
 //!         "- -6\n",
-//!         "- ' easy with reconcile!'\n",
-//!         "cursors: []\n"
+//!         "- ' easy with reconcile!'\n"
 //!     )
 //! );
 //!
 //! let deserialized = serde_yaml::from_str(&serialized).unwrap();
-//! let reconstructed = EditedText::from_change_set(
+//! let reconstructed = EditedText::from_diff(
 //!     original,
 //!     deserialized,
 //!     &*BuiltinTokenizer::Word
@ -192,13 +192,17 @@
 //!     reconstructed.apply().text(),
 //!     "Merging text is easy with reconcile!"
 //! );
+//! # }
 //! ```
 //!
 //! ## Error handling
 //!
 //! The library is designed to be robust and will always produce a result, even
-//! in edge cases. However, be aware that extremely large diffs may have
-//! performance implications.
+//! for edge cases.
+//!
+//! ## Performance
+//!
+//! Be aware that extremely large diffs may have performance implications.
 //!
 //! ## Algorithm overview
 //!
@ -211,13 +215,12 @@ mod tokenizer;
 mod types;
 mod utils;

-pub use operation_transformation::{ChangeSet, EditedText, reconcile};
+pub use operation_transformation::{EditedText, reconcile};
 pub use tokenizer::{BuiltinTokenizer, Tokenizer, token::Token};
 pub use types::{
-    cursor_position::CursorPosition, history::History, side::Side,
-    span_with_history::SpanWithHistory, text_with_cursors::TextWithCursors,
+    cursor_position::CursorPosition, history::History, number_or_string::NumberOrString,
+    side::Side, span_with_history::SpanWithHistory, text_with_cursors::TextWithCursors,
 };
-pub use utils::is_binary::is_binary;

 #[cfg(feature = "wasm")]
 pub mod wasm;
--- a/src/operation_transformation.rs
+++ b/src/operation_transformation.rs
@ -1,12 +1,10 @@
 mod edited_text;
 mod operation;
-mod transport;
 mod utils;
 use std::fmt::Debug;

 pub use edited_text::EditedText;
 pub use operation::Operation;
-pub use transport::ChangeSet;

 use crate::{Tokenizer, types::text_with_cursors::TextWithCursors};

--- a/src/operation_transformation/edited_text.rs
+++ b/src/operation_transformation/edited_text.rs
@ -4,15 +4,17 @@ use std::{fmt::Debug, vec};
 use serde::{Deserialize, Serialize};

 use crate::{
-    BuiltinTokenizer, ChangeSet, CursorPosition, TextWithCursors,
+    BuiltinTokenizer, CursorPosition, TextWithCursors,
    operation_transformation::{
        Operation,
-        transport::SimpleOperation,
        utils::{cook_operations::cook_operations, elongate_operations::elongate_operations},
    },
    raw_operation::RawOperation,
    tokenizer::Tokenizer,
-    types::{history::History, side::Side, span_with_history::SpanWithHistory},
+    types::{
+        history::History, number_or_string::NumberOrString, side::Side,
+        span_with_history::SpanWithHistory,
+    },
    utils::string_builder::StringBuilder,
 };

@ -105,6 +107,11 @@ where
    /// from the same original text. The operations are merged using the
    /// principles of Operational Transformation. The cursors are updated
    /// accordingly to reflect the changes made by the merged operations.
+    ///
+    /// # Panics
+    ///
+    /// Panics if there's an integer overflow (in i64) when calculating new
+    /// cursor positions.
    #[must_use]
    #[allow(clippy::too_many_lines)]
    pub fn merge(self, other: Self) -> Self {
@ -166,13 +173,14 @@ where
                    let result = operation.merge_operations(&mut last_other_op);

                    if let ref op @ (Operation::Insert { .. } | Operation::Equal { .. }) = result {
-                        let merged_length_signed =
-                            isize::try_from(merged_length).unwrap_or(isize::MAX);
-                        let seen_left_length_signed =
-                            isize::try_from(seen_left_length).unwrap_or(isize::MAX);
-                        let op_len_signed = isize::try_from(op.len()).unwrap_or(isize::MAX);
-                        let original_length_signed =
-                            isize::try_from(original_length).unwrap_or(isize::MAX);
+                        let merged_length_signed = isize::try_from(merged_length)
+                            .expect("merged_length must fit in isize");
+                        let seen_left_length_signed = isize::try_from(seen_left_length)
+                            .expect("seen_left_length must fit in isize");
+                        let op_len_signed =
+                            isize::try_from(op.len()).expect("op.len() must fit in isize");
+                        let original_length_signed = isize::try_from(original_length)
+                            .expect("original_length must fit in isize");

                        let shift = merged_length_signed - seen_left_length_signed + op_len_signed
                            - original_length_signed;
@ -199,13 +207,14 @@ where
                    let result = operation.merge_operations(&mut last_other_op);

                    if let ref op @ (Operation::Insert { .. } | Operation::Equal { .. }) = result {
-                        let merged_length_signed =
-                            isize::try_from(merged_length).unwrap_or(isize::MAX);
-                        let seen_right_length_signed =
-                            isize::try_from(seen_right_length).unwrap_or(isize::MAX);
-                        let op_len_signed = isize::try_from(op.len()).unwrap_or(isize::MAX);
-                        let original_length_signed =
-                            isize::try_from(original_length).unwrap_or(isize::MAX);
+                        let merged_length_signed = isize::try_from(merged_length)
+                            .expect("merged_length must fit in isize");
+                        let seen_right_length_signed = isize::try_from(seen_right_length)
+                            .expect("seen_right_length must fit in isize");
+                        let op_len_signed =
+                            isize::try_from(op.len()).expect("op.len() must fit in isize");
+                        let original_length_signed = isize::try_from(original_length)
+                            .expect("original_length must fit in isize");

                        let shift = merged_length_signed - seen_right_length_signed + op_len_signed
                            - original_length_signed;
@ -345,34 +354,122 @@ where
        history
    }

-    /// Serialize the `EditedText` as a `ChangeSet`, which contains only
-    /// the operations and cursor positions, but without the original text.
-    /// This is useful for sending changes over the network if there's
-    /// a clear consensus on the original text.
+    /// Convert the `EditedText` into a terse representation ready for
+    /// serialization. The result omits cursor positions and the original text.
+    /// This is useful for sending text diffs over the network if there's a
+    /// clear consensus on the original text.
+    ///
+    /// Inserts are represented as strings, deletes as negative integers,
+    /// and equal spans as positive integers.
+    ///
+    /// # Panics
+    ///
+    /// Panics if there's an integer overflow in i64.
    #[must_use]
-    pub fn to_change_set(&self) -> ChangeSet {
-        ChangeSet::new(
-            SimpleOperation::from_operations(&self.operations),
-            self.cursors.clone(),
-        )
+    pub fn to_diff(&self) -> Vec<NumberOrString> {
+        let mut result: Vec<NumberOrString> = Vec::with_capacity(self.operations.len());
+        let mut previous_equal: Option<usize> = None;
+
+        for operation in &self.operations {
+            match operation {
+                Operation::Equal { length, .. } => {
+                    if let Some(prev_length) = previous_equal {
+                        previous_equal = Some(prev_length + *length);
+                    } else {
+                        previous_equal = Some(*length);
+                    }
                }

-    /// Deserialize an `EditedText` from a `ChangeSet` and the original text.
-    /// This is useful for reconstructing the `EditedText` on the receiving
-    /// end after sending only the `ChangeSet` over the network.
+                Operation::Insert { text, .. } => {
+                    if let Some(prev_length) = previous_equal {
+                        result.push(NumberOrString::Number(
+                            i64::try_from(prev_length).expect("prev_length must fit in i64"),
+                        ));
+                        previous_equal = None;
+                    }
+
+                    let text: String = text
+                        .iter()
+                        .map(super::super::tokenizer::token::Token::original)
+                        .collect();
+                    result.push(NumberOrString::Text(text));
+                }
+
+                Operation::Delete {
+                    deleted_character_count,
+                    ..
+                } => {
+                    if let Some(prev_length) = previous_equal {
+                        result.push(NumberOrString::Number(
+                            i64::try_from(prev_length).expect("prev_length must fit in i64"),
+                        ));
+                        previous_equal = None;
+                    }
+
+                    let count = i64::try_from(*deleted_character_count)
+                        .expect("deleted_character_count must fit in i64");
+                    result.push(NumberOrString::Number(-count));
+                }
+            }
+        }
+
+        if let Some(prev_length) = previous_equal {
+            result.push(NumberOrString::Number(
+                i64::try_from(prev_length).expect("prev_length must fit in i64"),
+            ));
+        }
+
+        result
+    }
+
+    /// Deserialize an `EditedText` from a change list and the original text.
+    ///
+    /// # Panics
+    ///
+    /// Panics if there's an integer overflow in i64.
    #[must_use]
-    pub fn from_change_set(
-        text: &'a str,
-        change_set: ChangeSet,
+    pub fn from_diff(
+        original_text: &'a str,
+        diff: Vec<NumberOrString>,
        tokenizer: &Tokenizer<T>,
    ) -> EditedText<'a, T> {
-        let operations = SimpleOperation::to_operations(change_set.operations, text, tokenizer);
+        let mut operations: Vec<Operation<T>> = Vec::with_capacity(diff.len());
+        let mut order = 0;
+
+        for item in diff {
+            match item {
+                NumberOrString::Number(length) => {
+                    if length >= 0 {
+                        let length = usize::try_from(length).expect("length must fit in usize");
+                        let original_characters: String =
+                            original_text.chars().skip(order).take(length).collect();
+
+                        let original_tokens = tokenizer(&original_characters);
+                        for token in original_tokens {
+                            operations
+                                .push(Operation::create_equal(order, token.get_original_length()));
+                            order += token.get_original_length();
+                        }
+                    } else {
+                        let length =
+                            usize::try_from(-length).expect("negative length must fit in usize");
+                        operations.push(Operation::create_delete(order, length));
+                        order += length;
+                    }
+                }
+                NumberOrString::Text(text) => {
+                    let tokens = tokenizer(&text);
+                    operations.push(Operation::create_insert(order, tokens));
+                }
+            }
+        }
+
        let operation_count = operations.len();
        EditedText::new(
-            text,
+            original_text,
            operations,
            vec![Side::Left; operation_count],
-            change_set.cursors,
+            vec![],
        )
    }
 }
@ -423,34 +520,29 @@ mod tests {
        assert_eq!(operations.apply().text(), expected);
    }

+    #[cfg(feature = "serde")]
    #[test]
-    fn test_change_set_deserialisation() {
+    fn test_changes_deserialisation() {
        let original = "Merging text is hard!";
        let changes = "Merging text is easy with reconcile!";
        let result = EditedText::from_strings(original, &changes.into());
-        let serialized = serde_yaml::to_string(&result.to_change_set()).unwrap();
-
-        let expected = concat!(
-            "operations:\n",
-            "- 15\n",
-            "- -6\n",
-            "- ' easy with reconcile!'\n",
-            "cursors: []\n"
-        );
+        let serialized = serde_yaml::to_string(&result.to_diff()).unwrap();

+        let expected = concat!("- 15\n", "- -6\n", "- ' easy with reconcile!'\n",);
        assert_eq!(serialized, expected);
    }

+    #[cfg(feature = "serde")]
    #[test]
-    fn test_change_set_serialization() {
+    fn test_changes_serialization() {
        let original = "The quick brown fox jumps over the lazy dog.";
        let updated = "The quick red fox jumped over the very lazy dog!";

        let edited_text = EditedText::from_strings(original, &updated.into());

-        let change_set = edited_text.to_change_set();
+        let changes = edited_text.to_diff();
        let deserialized_edited_text =
-            EditedText::from_change_set(original, change_set, &*BuiltinTokenizer::Word);
+            EditedText::from_diff(original, changes, &*BuiltinTokenizer::Word);

        assert_eq!(deserialized_edited_text.apply().text(), updated);
    }
--- a/src/operation_transformation/transport.rs
+++ b/src/operation_transformation/transport.rs
@ -1,204 +0,0 @@
-use std::fmt::Debug;
-
-#[cfg(feature = "serde")]
-use serde::{
-    Deserialize, Serialize,
-    de::{self, Deserializer, Visitor},
-    ser::Serializer,
-};
-
-use crate::{CursorPosition, Tokenizer, operation_transformation::Operation};
-
-#[derive(Clone, PartialEq, Eq, Debug)]
-pub enum SimpleOperation {
-    Equal { length: usize },
-    Insert { text: String },
-    Delete { length: usize },
-}
-
-impl SimpleOperation {
-    pub fn from_operations<T>(operation: &Vec<Operation<T>>) -> Vec<Self>
-    where
-        T: PartialEq + Clone + Debug,
-    {
-        let mut result: Vec<Self> = Vec::with_capacity(operation.len());
-        let mut previous_equal: Option<usize> = None;
-
-        for operation in operation {
-            match operation {
-                Operation::Equal { length, .. } => {
-                    if let Some(prev_length) = previous_equal {
-                        previous_equal = Some(prev_length + *length);
-                    } else {
-                        previous_equal = Some(*length);
-                    }
-                }
-
-                Operation::Insert { text, .. } => {
-                    if let Some(prev_length) = previous_equal {
-                        result.push(SimpleOperation::Equal {
-                            length: prev_length,
-                        });
-                        previous_equal = None;
-                    }
-
-                    let text: String = text
-                        .iter()
-                        .map(super::super::tokenizer::token::Token::original)
-                        .collect();
-                    result.push(SimpleOperation::Insert { text });
-                }
-
-                Operation::Delete {
-                    deleted_character_count,
-                    ..
-                } => {
-                    if let Some(prev_length) = previous_equal {
-                        result.push(SimpleOperation::Equal {
-                            length: prev_length,
-                        });
-                        previous_equal = None;
-                    }
-
-                    result.push(SimpleOperation::Delete {
-                        length: *deleted_character_count,
-                    });
-                }
-            }
-        }
-
-        if let Some(prev_length) = previous_equal {
-            result.push(SimpleOperation::Equal {
-                length: prev_length,
-            });
-        }
-
-        result
-    }
-
-    // This is similar to `crate::operation_transformation::utils::cook_operations`
-    pub fn to_operations<T>(
-        simple_operations: Vec<Self>,
-        original_text: &str,
-        tokenizer: &Tokenizer<T>,
-    ) -> Vec<Operation<T>>
-    where
-        T: PartialEq + Clone + Debug,
-    {
-        let mut operations: Vec<Operation<T>> = Vec::with_capacity(simple_operations.len());
-        let mut order = 0;
-
-        for simple_operation in simple_operations {
-            match simple_operation {
-                SimpleOperation::Equal { length } => {
-                    let original_characters: String =
-                        original_text.chars().skip(order).take(length).collect();
-
-                    let original_tokens = tokenizer(&original_characters);
-                    for token in original_tokens {
-                        operations
-                            .push(Operation::create_equal(order, token.get_original_length()));
-                        order += token.get_original_length();
-                    }
-                }
-
-                SimpleOperation::Insert { text } => {
-                    let tokens = tokenizer(&text);
-                    operations.push(Operation::create_insert(order, tokens));
-                }
-
-                SimpleOperation::Delete { length } => {
-                    operations.push(Operation::create_delete(order, length));
-                    order += length;
-                }
-            }
-        }
-
-        operations
-    }
-}
-
-#[cfg(feature = "serde")]
-impl Serialize for SimpleOperation {
-    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-    where
-        S: Serializer,
-    {
-        // neat idea from https://github.com/spebern/operational-transform-rs/blob/9faa17f0a2b282ac2e09dbb2d29fdaf2ae0bbb4a/operational-transform/src/serde.rs#L14
-        match self {
-            SimpleOperation::Equal { length } => serializer.serialize_u64(*length as u64),
-            SimpleOperation::Insert { text } => serializer.serialize_str(text),
-            SimpleOperation::Delete { length } => {
-                serializer.serialize_i64(-(i64::try_from(*length).unwrap_or(i64::MAX)))
-            }
-        }
-    }
-}
-
-#[cfg(feature = "serde")]
-impl<'de> Deserialize<'de> for SimpleOperation {
-    fn deserialize<D>(deserializer: D) -> Result<SimpleOperation, D::Error>
-    where
-        D: Deserializer<'de>,
-    {
-        use std::fmt;
-
-        struct OperationVisitor;
-
-        impl Visitor<'_> for OperationVisitor {
-            type Value = SimpleOperation;
-
-            fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
-                formatter.write_str("an integer between -2^63 and 2^64-1 or a string")
-            }
-
-            fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E>
-            where
-                E: de::Error,
-            {
-                Ok(SimpleOperation::Equal {
-                    length: usize::try_from(value).unwrap_or(usize::MAX),
-                })
-            }
-
-            fn visit_i64<E>(self, value: i64) -> Result<Self::Value, E>
-            where
-                E: de::Error,
-            {
-                Ok(SimpleOperation::Delete {
-                    length: usize::try_from(-value).unwrap_or(usize::MAX),
-                })
-            }
-
-            fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
-            where
-                E: de::Error,
-            {
-                Ok(SimpleOperation::Insert {
-                    text: value.to_owned(),
-                })
-            }
-        }
-
-        deserializer.deserialize_any(OperationVisitor)
-    }
-}
-
-/// A serializable representation of the changes made to a text document
-/// without the original text.
-#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
-#[derive(Debug, Clone, PartialEq, Default)]
-pub struct ChangeSet {
-    pub operations: Vec<SimpleOperation>,
-    pub cursors: Vec<CursorPosition>,
-}
-
-impl ChangeSet {
-    #[must_use]
-    pub fn new(operations: Vec<SimpleOperation>, cursors: Vec<CursorPosition>) -> Self {
-        Self {
-            operations,
-            cursors,
-        }
-    }
-}
--- a/src/types.rs
+++ b/src/types.rs
@ -1,5 +1,6 @@
 pub mod cursor_position;
 pub mod history;
+pub mod number_or_string;
 pub mod side;
 pub mod span_with_history;
 pub mod text_with_cursors;
--- a/src/types/number_or_string.rs
+++ b/src/types/number_or_string.rs
@ -0,0 +1,74 @@
+use std::fmt::Debug;
+
+#[cfg(feature = "serde")]
+use serde::{Deserialize, Serialize};
+#[cfg(feature = "wasm")]
+use wasm_bindgen::prelude::*;
+
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[cfg_attr(feature = "serde", serde(untagged))]
+#[derive(Debug, Clone, PartialEq)]
+pub enum NumberOrString {
+    Number(i64),
+    Text(String),
+}
+
+#[cfg(feature = "wasm")]
+impl TryFrom<JsValue> for NumberOrString {
+    type Error = DeserialisationError;
+
+    fn try_from(value: JsValue) -> Result<Self, Self::Error> {
+        if let Ok(num) = value.clone().try_into() {
+            return Ok(NumberOrString::Number(num));
+        }
+
+        if let Ok(text) = value.try_into() {
+            return Ok(NumberOrString::Text(text));
+        }
+
+        Err(DeserialisationError::new(
+            "Could not parse JsValue as either number or string",
+        ))
+    }
+}
+
+#[cfg(feature = "wasm")]
+impl From<NumberOrString> for JsValue {
+    fn from(value: NumberOrString) -> Self {
+        match value {
+            NumberOrString::Number(num) => JsValue::from(num),
+            NumberOrString::Text(text) => JsValue::from(text),
+        }
+    }
+}
+
+/// Error type for deserialisation failures
+#[cfg(feature = "wasm")]
+#[derive(Debug, Clone)]
+pub struct DeserialisationError {
+    pub message: String,
+}
+
+#[cfg(feature = "wasm")]
+impl DeserialisationError {
+    pub fn new(message: impl Into<String>) -> Self {
+        Self {
+            message: message.into(),
+        }
+    }
+}
+
+#[cfg(feature = "wasm")]
+impl std::fmt::Display for DeserialisationError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "Deserialisation error: {}", self.message)
+    }
+}
+
+#[cfg(feature = "wasm")]
+impl std::error::Error for DeserialisationError {}
+
+#[cfg(feature = "wasm")]
+impl From<DeserialisationError> for JsValue {
+    fn from(error: DeserialisationError) -> Self { JsValue::from_str(&error.message) }
+}
--- a/src/utils.rs
+++ b/src/utils.rs
@ -1,6 +1,5 @@
 pub mod common_prefix_len;
 pub mod common_suffix_len;
 pub mod find_longest_prefix_contained_within;
-pub mod is_binary;
 pub mod myers_diff;
 pub mod string_builder;
--- a/src/utils/is_binary.rs
+++ b/src/utils/is_binary.rs
@ -1,26 +0,0 @@
-/// Heuristically determine if the given data is a binary or a text file's
-/// content.
-///
-/// Only text inputs can be reconciled using the crate's functions.
-#[must_use]
-pub fn is_binary(data: &[u8]) -> bool {
-    if data.contains(&0) {
-        // Even though the NUL character is valid in UTF-8, it's highly suspicious in
-        // human-readable text.
-        return true;
-    }
-
-    std::str::from_utf8(data).is_err()
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_is_binary() {
-        assert!(is_binary(&[0, 159, 146, 150]));
-        assert!(is_binary(&[0, 12]));
-        assert!(!is_binary(b"hello"));
-    }
-}
--- a/src/utils/myers_diff.rs
+++ b/src/utils/myers_diff.rs
@ -87,7 +87,7 @@ struct V {
 impl V {
    fn new(max_d: usize) -> Self {
        // max_d should fit in isize for the algorithm to work correctly
-        let offset = isize::try_from(max_d).unwrap_or(isize::MAX);
+        let offset = isize::try_from(max_d).expect("max_d must fit in isize");
        Self {
            offset,
            v: vec![0; 2 * max_d],
@ -101,16 +101,15 @@ impl Index<isize> for V {
    type Output = usize;

    fn index(&self, index: isize) -> &Self::Output {
-        let idx = usize::try_from(index + self.offset).unwrap_or(usize::MAX);
-        &self.v[idx.min(self.v.len().saturating_sub(1))]
+        let idx = usize::try_from(index + self.offset).expect("index + offset must fit in usize");
+        &self.v[idx]
    }
 }

 impl IndexMut<isize> for V {
    fn index_mut(&mut self, index: isize) -> &mut Self::Output {
-        let idx = usize::try_from(index + self.offset).unwrap_or(usize::MAX);
-        let len = self.v.len();
-        &mut self.v[idx.min(len.saturating_sub(1))]
+        let idx = usize::try_from(index + self.offset).expect("index + offset must fit in usize");
+        &mut self.v[idx]
    }
 }

@ -145,7 +144,8 @@ where

    // By Lemma 1 in the paper, the optimal edit script length is odd or even as
    // `delta` is odd or even.
-    let delta = isize::try_from(n).unwrap_or(isize::MAX) - isize::try_from(m).unwrap_or(isize::MAX);
+    let delta = isize::try_from(n).expect("n must fit in isize")
+        - isize::try_from(m).expect("m must fit in isize");
    let odd = delta & 1 == 1;

    // The initial point at (0, -1)
@ -157,7 +157,7 @@ where
    assert!(vf.len() >= d_max);
    assert!(vb.len() >= d_max);

-    let d_max_isize = isize::try_from(d_max).unwrap_or(isize::MAX);
+    let d_max_isize = isize::try_from(d_max).expect("d_max must fit in isize");
    for d in 0..d_max_isize {
        // Forward path
        for k in (-d..=d).rev().step_by(2) {
@ -166,7 +166,8 @@ where
            } else {
                vf[k - 1] + 1
            };
-            let y = usize::try_from(isize::try_from(x).unwrap_or(isize::MAX) - k).unwrap_or(0);
+            let y = usize::try_from(isize::try_from(x).expect("x must fit in isize") - k)
+                .expect("x - k must be non-negative and fit in usize");

            // The coordinate of the start of a snake
            let (x0, y0) = (x, y);
@ -204,7 +205,8 @@ where
            } else {
                vb[k - 1] + 1
            };
-            let mut y = usize::try_from(isize::try_from(x).unwrap_or(isize::MAX) - k).unwrap_or(0);
+            let mut y = usize::try_from(isize::try_from(x).expect("x must fit in isize") - k)
+                .expect("x - k must be non-negative and fit in usize");

            // The coordinate of the start of a snake
            if x < n && y < m {
--- a/src/wasm.rs
+++ b/src/wasm.rs
@ -3,7 +3,7 @@ use core::str;

 use wasm_bindgen::prelude::*;

-use crate::{BuiltinTokenizer, CursorPosition, SpanWithHistory, TextWithCursors};
+use crate::{BuiltinTokenizer, CursorPosition, EditedText, SpanWithHistory, TextWithCursors};

 #[global_allocator]
 static ALLOC: wee_alloc::WeeAlloc<'_> = wee_alloc::WeeAlloc::INIT;
@ -32,6 +32,7 @@ pub fn reconcile_with_history(
    tokenizer: BuiltinTokenizer,
 ) -> TextWithCursorsAndHistory {
    set_panic_hook();
+
    let reconciled = crate::reconcile(parent, left, right, &*tokenizer);
    let text_with_cursors = reconciled.apply();

@ -54,10 +55,6 @@ pub fn reconcile_with_history(
 /// # Returns
 ///
 /// The merged document.
-///
-/// # Panics
-///
-/// If any of the input documents are not valid UTF-8 strings.
 #[wasm_bindgen(js_name = genericReconcile)]
 #[must_use]
 pub fn generic_reconcile(
@ -68,51 +65,56 @@ pub fn generic_reconcile(
 ) -> Vec<u8> {
    set_panic_hook();

-    if crate::is_binary(parent) || crate::is_binary(left) || crate::is_binary(right) {
-        right.to_vec()
+    if let (Some(parent), Some(left), Some(right)) = (
+        string_or_nothing(parent),
+        string_or_nothing(left),
+        string_or_nothing(right),
+    ) {
+        crate::reconcile(&parent, &left.into(), &right.into(), &*tokenizer)
+            .apply()
+            .text()
+            .into_bytes()
    } else {
-        crate::reconcile(
-            str::from_utf8(parent).expect("parent must be valid UTF-8 because it's not binary"),
-            &str::from_utf8(left)
-                .expect("left must be valid UTF-8 because it's not binary")
-                .into(),
-            &str::from_utf8(right)
-                .expect("right must be valid UTF-8 because it's not binary")
-                .into(),
+        right.to_vec()
+    }
+}
+
+/// WASM wrapper around getting a compact diff representation of two texts as a
+/// list of numbers and strings.
+#[wasm_bindgen(js_name = diff)]
+#[must_use]
+pub fn diff(parent: &str, changed: &TextWithCursors, tokenizer: BuiltinTokenizer) -> Vec<JsValue> {
+    set_panic_hook();
+
+    let edited_text = EditedText::from_strings_with_tokenizer(parent, changed, &*tokenizer);
+    edited_text
+        .to_diff()
+        .into_iter()
+        .map(std::convert::Into::into)
+        .collect()
+}
+
+/// Inverse of `diff`, applies a compact diff representation to a parent text
+///
+/// # Panics
+///
+/// Panics if the diff format is invalid or there's an integer overflow when
+/// applying the diff.
+#[wasm_bindgen(js_name = undiff)]
+#[must_use]
+pub fn undiff(parent: &str, diff: Vec<JsValue>, tokenizer: BuiltinTokenizer) -> String {
+    set_panic_hook();
+
+    EditedText::from_diff(
+        parent,
+        diff.into_iter()
+            .map(std::convert::TryInto::try_into)
+            .collect::<Result<_, _>>()
+            .expect("Invalid diff format"),
        &*tokenizer,
    )
    .apply()
    .text()
-        .into_bytes()
-    }
-}
-
-/// WASM wrapper around getting a compact diff representation as a JSON string
-///
-/// # Panics
-///
-/// If serialization to JSON fails which should not happen
-#[wasm_bindgen(js_name = getCompactDiff)]
-#[must_use]
-pub fn get_compact_diff(
-    parent: &str,
-    changed: &TextWithCursors,
-    tokenizer: BuiltinTokenizer,
-) -> String {
-    set_panic_hook();
-    let edited_text = crate::EditedText::from_strings_with_tokenizer(parent, changed, &*tokenizer);
-    let change_set = edited_text.to_change_set();
-
-    serde_json::to_string(&change_set).expect("Failed to serialize change set")
-}
-
-/// Heuristically determine if the given data is a binary or a text file's
-/// content.
-#[wasm_bindgen(js_name = isBinary)]
-#[must_use]
-pub fn is_binary(data: &[u8]) -> bool {
-    set_panic_hook();
-    crate::is_binary(data)
 }

 fn set_panic_hook() {
@ -140,3 +142,30 @@ impl TextWithCursorsAndHistory {
    #[must_use]
    pub fn history(&self) -> Vec<SpanWithHistory> { self.history.clone() }
 }
+
+/// Returns the UTF8 parsed string if it's a text, or `None` if it's likely
+/// binary.
+#[must_use]
+fn string_or_nothing(data: &[u8]) -> Option<String> {
+    if data.contains(&0) {
+        // Even though the NUL character is valid in UTF-8, it's highly suspicious in
+        // human-readable text.
+        return None;
+    }
+
+    std::str::from_utf8(data)
+        .map(std::borrow::ToOwned::to_owned)
+        .ok()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_string_or_nothing() {
+        assert_eq!(string_or_nothing(&[0, 159, 146, 150]), None);
+        assert_eq!(string_or_nothing(&[0, 12]), None);
+        assert_eq!(string_or_nothing(b"hello"), Some("hello".into()));
+    }
+}
--- a/tests/test.rs
+++ b/tests/test.rs
@ -3,7 +3,7 @@ mod example_document;
 use std::{fs, path::Path};

 use example_document::ExampleDocument;
-use reconcile_text::{BuiltinTokenizer, EditedText, reconcile};
+use reconcile_text::{BuiltinTokenizer, reconcile};
 use serde::Deserialize;

 #[test]
@ -34,8 +34,11 @@ fn test_document_one_way_with_cursors() {
    }
 }

+#[cfg(feature = "serde")]
 #[test]
-fn test_document_one_way_with_cursors_and_serialisation() {
+fn test_document_one_way_with_serialisation() {
+    use reconcile_text::EditedText;
+
    for doc in &get_all_documents() {
        let parent = doc.parent();
        let left_operations =
@ -47,19 +50,23 @@ fn test_document_one_way_with_cursors_and_serialisation() {
        );

        let serialised_left =
-            serde_yaml::from_str(&serde_yaml::to_string(&left_operations.to_change_set()).unwrap())
+            serde_yaml::from_str(&serde_yaml::to_string(&left_operations.to_diff()).unwrap())
                .unwrap();
-        let serialised_right = serde_yaml::from_str(
-            &serde_yaml::to_string(&right_operations.to_change_set()).unwrap(),
-        )
+        let serialised_right =
+            serde_yaml::from_str(&serde_yaml::to_string(&right_operations.to_diff()).unwrap())
                .unwrap();

        let restored_left_operations =
-            EditedText::from_change_set(&parent, serialised_left, &*BuiltinTokenizer::Word);
+            EditedText::from_diff(&parent, serialised_left, &*BuiltinTokenizer::Word);
        let restored_right_operations =
-            EditedText::from_change_set(&parent, serialised_right, &*BuiltinTokenizer::Word);
+            EditedText::from_diff(&parent, serialised_right, &*BuiltinTokenizer::Word);

-        doc.assert_eq(&restored_left_operations.merge(restored_right_operations));
+        doc.assert_eq_without_cursors(
+            &restored_left_operations
+                .merge(restored_right_operations)
+                .apply()
+                .text(),
+        );
    }
 }

--- a/tests/wasm.rs
+++ b/tests/wasm.rs
@ -55,22 +55,16 @@ fn test_merge_binary() {
    );
 }

-#[wasm_bindgen_test(unsupported = test)]
-fn test_is_binary() {
-    assert!(is_binary(&[0, 159, 146, 150]));
-    assert!(is_binary(&[0, 12]));
-    assert!(!is_binary(b"hello"));
-}
-
-#[wasm_bindgen_test(unsupported = test)]
-fn test_get_compact_diff() {
+#[wasm_bindgen_test] // JsValue isn't supported outside of wasm
+fn test_diff() {
    let parent = "hello ";
    let changed = "world";
-    let result = get_compact_diff(parent, &changed.into(), BuiltinTokenizer::Word);
-    assert_eq!(result, "{\"operations\":[-6,\"world\"],\"cursors\":[]}");
-}

-#[wasm_bindgen_test(unsupported = test)]
-fn test_is_binary_empty() {
-    assert!(!is_binary(b""));
+    let result = diff(parent, &changed.into(), BuiltinTokenizer::Word);
+
+    assert_eq!(result.len(), 2);
+    let first: i64 = result[0].clone().try_into().unwrap();
+    let second: String = result[1].clone().try_into().unwrap();
+    assert_eq!(first, -6);
+    assert_eq!(second, "world");
 }