Improve compact diff API (#24)

* Remove is_binary from API * Format * Rename file * Test with more feature combinations * Don't depend on serde for wasm * Fix lint & tests * Don't unwrap to MAX number * Expose undiff to JS * Add undiff tests * Lint * Change name
2025-11-16 15:43:19 +00:00 · 2025-11-16 15:43:19 +00:00 · e85eb485e8
commit e85eb485e8
parent 6191d1adb3
20 changed files with 430 additions and 424 deletions
--- a/reconcile-js/package-lock.json
+++ b/reconcile-js/package-lock.json
@ -1231,13 +1231,13 @@
      "license": "MIT"
    },
    "node_modules/@types/node": {
-      "version": "24.0.10",
-      "resolved": "https://registry.npmjs.org/@types/node/-/node-24.0.10.tgz",
-      "integrity": "sha512-ENHwaH+JIRTDIEEbDK6QSQntAYGtbvdDXnMXnZaZ6k13Du1dPMmprkEHIL7ok2Wl2aZevetwTAb5S+7yIF+enA==",
+      "version": "24.10.1",
+      "resolved": "https://registry.npmjs.org/@types/node/-/node-24.10.1.tgz",
+      "integrity": "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
-        "undici-types": "~7.8.0"
+        "undici-types": "~7.16.0"
      }
    },
    "node_modules/@types/stack-utils": {
@ -5274,9 +5274,9 @@
      }
    },
    "node_modules/undici-types": {
-      "version": "7.8.0",
-      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.8.0.tgz",
-      "integrity": "sha512-9UJ2xGDvQ43tYyVMpuHlsgApydB8ZKfVYTsLDhXkFL/6gfkp+U8xTGdh8pMJv1SpZna0zxG1DwsKZsreLbXBxw==",
+      "version": "7.16.0",
+      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz",
+      "integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==",
      "dev": true,
      "license": "MIT"
    },
--- a/reconcile-js/src/index.test.ts
+++ b/reconcile-js/src/index.test.ts
@ -1,4 +1,9 @@
-import { reconcile, reconcileWithHistory } from './index';
+import { reconcile, reconcileWithHistory, diff, undiff } from './index';
+import * as fs from 'fs';
+import * as path from 'path';
+import { fileURLToPath } from 'url';
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));

 describe('reconcile', () => {
  it('call reconcile without cursors', () => {
@ -44,3 +49,35 @@ describe('reconcile', () => {
    expect(result.history.length).toBeGreaterThan(0);
  });
 });
+
+describe('test_diff_and_undiff_are_inverse', () => {
+  const resourcesPath = path.join(__dirname, '../../tests/resources');
+
+  const readFileSlice = (fileName: string, start: number, end: number): string => {
+    const filePath = path.join(resourcesPath, fileName);
+    const content = fs.readFileSync(filePath, 'utf-8');
+    const chars = Array.from(content); // Handle unicode properly
+    return chars.slice(start, Math.min(end, chars.length)).join('');
+  };
+
+  const files = ['pride_and_prejudice.txt', 'room_with_a_view.txt', 'blns.txt'];
+
+  const ranges = [{ start: 0, end: 50000 }];
+
+  files.forEach((file1) => {
+    files.forEach((file2) => {
+      ranges.forEach((range1) => {
+        ranges.forEach((range2) => {
+          it(`should diff & undiff ${file1}[${range1.start}..${range1.end}], ${file2}[${range2.start}..${range2.end}] without panic`, () => {
+            const content1 = readFileSlice(file1, range1.start, range1.end);
+            const content2 = readFileSlice(file2, range2.start, range2.end);
+
+            const changes = diff(content1, content2);
+            const actual = undiff(content1, changes);
+            expect(actual).toEqual(content2);
+          });
+        });
+      });
+    });
+  });
+});
--- a/reconcile-js/src/index.ts
+++ b/reconcile-js/src/index.ts
@ -4,8 +4,8 @@ import {
  TextWithCursors as wasmTextWithCursors,
  SpanWithHistory as wasmSpanWithHistory,
  reconcileWithHistory as wasmReconcileWithHistory,
-  isBinary as wasmIsBinary,
-  getCompactDiff as wasmGetCompactDiff,
+  diff as wasmDiff,
+  undiff as wasmUndiff,
  initSync,
 } from 'reconcile-text';

@ -183,22 +183,22 @@ export function reconcile(
 /**
 * Generates a compact diff representation between an original and changed text.
 *
- * These can be parsed and unpacked using Rust crate's EditedText::from_change_set.
+ * These can be parsed and unpacked using the `undiff` function or the Rust crate's EditedText::from_diff.
+ * Cursor positions are omitted from the diff result.
 *
 * This function computes the differences between two versions of text and returns
- * a compact string representation of those changes. The returned format is
- * serialised JSON.
+ * a compact representation of those changes.
 *
 * @param original - The original/base version of the text
 * @param changed - The modified version of the text (either string or TextWithCursors with cursor positions)
 * @param tokenizer - The tokenisation strategy, which is the same as used in `reconcile`.
- * @returns A compact string representation of the diff between original and changed text
+ * @returns An array representing the compact diff, with inserts as strings and deletes as negative integers.
 */
-export function getCompactDiff(
+export function diff(
  original: string,
  changed: string | TextWithOptionalCursors,
  tokenizer: BuiltinTokenizer = 'Word'
-): string {
+): Array<number | string> {
  init();

  if (!BUILTIN_TOKENIZERS.includes(tokenizer)) {
@ -207,13 +207,38 @@ export function getCompactDiff(

  const changedWasm = toWasmTextWithCursors(changed);

-  const result = wasmGetCompactDiff(original, changedWasm, tokenizer);
+  const result = wasmDiff(original, changedWasm, tokenizer);

  changedWasm.free();

  return result;
 }

+/**
+ * Applies a compact diff to an original text to reconstruct the changed version.
+ *
+ * This function takes an original text and a compact diff representation (as produced
+ * by the `diff` function) and reconstructs the modified text.
+ *
+ * @param original - The original/base version of the text
+ * @param diff - The compact diff array representing changes (inserts as strings, deletes as negative integers)
+ * @param tokenizer - The tokenisation strategy, which is the same as used in `reconcile`.
+ * @returns The reconstructed changed text as a string.
+ */
+export function undiff(
+  original: string,
+  diff: Array<number | string>,
+  tokenizer: BuiltinTokenizer = 'Word'
+): string {
+  init();
+
+  if (!BUILTIN_TOKENIZERS.includes(tokenizer)) {
+    throw new Error(UNSUPPORTED_TOKENIZER_ERROR);
+  }
+
+  return wasmUndiff(original, diff, tokenizer);
+}
+
 /**
 * Merges three versions of text and returns detailed provenance information.
 *
@ -272,19 +297,6 @@ export function reconcileWithHistory(
  };
 }

-/**
- * Check (using heuristics) if the given data is binary or text content.
- *
- * Only text inputs can be reconciled using the library's functions.
- *
- * @param data - The data to check for binary content. This should be a Uint8Array.
- * @returns True if the data is likely binary, false if it is likely text.
- */
-export function isBinary(data: Uint8Array): boolean {
-  init();
-  return wasmIsBinary(data);
-}
-
 function init() {
  if (isInitialised) {
    return;
--- a/reconcile-js/tsconfig.json
+++ b/reconcile-js/tsconfig.json
@ -9,6 +9,5 @@
    "declarationDir": "./dist/types",
    "skipLibCheck": true,
    "inlineSourceMap": true
-  },
-  "exclude": ["./dist", "**/*.test.ts"]
+  }
 }