Improve compact diff API (#24)

* Remove is_binary from API

* Format

* Rename file

* Test with more feature combinations

* Don't depend on serde for wasm

* Fix lint & tests

* Don't unwrap to MAX number

* Expose undiff to JS

* Add undiff tests

* Lint

* Change name
This commit is contained in:
Andras Schmelczer 2025-11-16 15:43:19 +00:00 committed by GitHub
parent 6191d1adb3
commit e85eb485e8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
20 changed files with 430 additions and 424 deletions

View file

@ -1231,13 +1231,13 @@
"license": "MIT"
},
"node_modules/@types/node": {
"version": "24.0.10",
"resolved": "https://registry.npmjs.org/@types/node/-/node-24.0.10.tgz",
"integrity": "sha512-ENHwaH+JIRTDIEEbDK6QSQntAYGtbvdDXnMXnZaZ6k13Du1dPMmprkEHIL7ok2Wl2aZevetwTAb5S+7yIF+enA==",
"version": "24.10.1",
"resolved": "https://registry.npmjs.org/@types/node/-/node-24.10.1.tgz",
"integrity": "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ==",
"dev": true,
"license": "MIT",
"dependencies": {
"undici-types": "~7.8.0"
"undici-types": "~7.16.0"
}
},
"node_modules/@types/stack-utils": {
@ -5274,9 +5274,9 @@
}
},
"node_modules/undici-types": {
"version": "7.8.0",
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.8.0.tgz",
"integrity": "sha512-9UJ2xGDvQ43tYyVMpuHlsgApydB8ZKfVYTsLDhXkFL/6gfkp+U8xTGdh8pMJv1SpZna0zxG1DwsKZsreLbXBxw==",
"version": "7.16.0",
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz",
"integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==",
"dev": true,
"license": "MIT"
},

View file

@ -1,4 +1,9 @@
import { reconcile, reconcileWithHistory } from './index';
import { reconcile, reconcileWithHistory, diff, undiff } from './index';
import * as fs from 'fs';
import * as path from 'path';
import { fileURLToPath } from 'url';
const __dirname = path.dirname(fileURLToPath(import.meta.url));
describe('reconcile', () => {
it('call reconcile without cursors', () => {
@ -44,3 +49,35 @@ describe('reconcile', () => {
expect(result.history.length).toBeGreaterThan(0);
});
});
describe('test_diff_and_undiff_are_inverse', () => {
const resourcesPath = path.join(__dirname, '../../tests/resources');
const readFileSlice = (fileName: string, start: number, end: number): string => {
const filePath = path.join(resourcesPath, fileName);
const content = fs.readFileSync(filePath, 'utf-8');
const chars = Array.from(content); // Handle unicode properly
return chars.slice(start, Math.min(end, chars.length)).join('');
};
const files = ['pride_and_prejudice.txt', 'room_with_a_view.txt', 'blns.txt'];
const ranges = [{ start: 0, end: 50000 }];
files.forEach((file1) => {
files.forEach((file2) => {
ranges.forEach((range1) => {
ranges.forEach((range2) => {
it(`should diff & undiff ${file1}[${range1.start}..${range1.end}], ${file2}[${range2.start}..${range2.end}] without panic`, () => {
const content1 = readFileSlice(file1, range1.start, range1.end);
const content2 = readFileSlice(file2, range2.start, range2.end);
const changes = diff(content1, content2);
const actual = undiff(content1, changes);
expect(actual).toEqual(content2);
});
});
});
});
});
});

View file

@ -4,8 +4,8 @@ import {
TextWithCursors as wasmTextWithCursors,
SpanWithHistory as wasmSpanWithHistory,
reconcileWithHistory as wasmReconcileWithHistory,
isBinary as wasmIsBinary,
getCompactDiff as wasmGetCompactDiff,
diff as wasmDiff,
undiff as wasmUndiff,
initSync,
} from 'reconcile-text';
@ -183,22 +183,22 @@ export function reconcile(
/**
* Generates a compact diff representation between an original and changed text.
*
* These can be parsed and unpacked using Rust crate's EditedText::from_change_set.
* These can be parsed and unpacked using the `undiff` function or the Rust crate's EditedText::from_diff.
* Cursor positions are omitted from the diff result.
*
* This function computes the differences between two versions of text and returns
* a compact string representation of those changes. The returned format is
* serialised JSON.
* a compact representation of those changes.
*
* @param original - The original/base version of the text
* @param changed - The modified version of the text (either string or TextWithCursors with cursor positions)
* @param tokenizer - The tokenisation strategy, which is the same as used in `reconcile`.
* @returns A compact string representation of the diff between original and changed text
* @returns An array representing the compact diff, with inserts as strings and deletes as negative integers.
*/
export function getCompactDiff(
export function diff(
original: string,
changed: string | TextWithOptionalCursors,
tokenizer: BuiltinTokenizer = 'Word'
): string {
): Array<number | string> {
init();
if (!BUILTIN_TOKENIZERS.includes(tokenizer)) {
@ -207,13 +207,38 @@ export function getCompactDiff(
const changedWasm = toWasmTextWithCursors(changed);
const result = wasmGetCompactDiff(original, changedWasm, tokenizer);
const result = wasmDiff(original, changedWasm, tokenizer);
changedWasm.free();
return result;
}
/**
* Applies a compact diff to an original text to reconstruct the changed version.
*
* This function takes an original text and a compact diff representation (as produced
* by the `diff` function) and reconstructs the modified text.
*
* @param original - The original/base version of the text
* @param diff - The compact diff array representing changes (inserts as strings, deletes as negative integers)
* @param tokenizer - The tokenisation strategy, which is the same as used in `reconcile`.
* @returns The reconstructed changed text as a string.
*/
export function undiff(
original: string,
diff: Array<number | string>,
tokenizer: BuiltinTokenizer = 'Word'
): string {
init();
if (!BUILTIN_TOKENIZERS.includes(tokenizer)) {
throw new Error(UNSUPPORTED_TOKENIZER_ERROR);
}
return wasmUndiff(original, diff, tokenizer);
}
/**
* Merges three versions of text and returns detailed provenance information.
*
@ -272,19 +297,6 @@ export function reconcileWithHistory(
};
}
/**
* Check (using heuristics) if the given data is binary or text content.
*
* Only text inputs can be reconciled using the library's functions.
*
* @param data - The data to check for binary content. This should be a Uint8Array.
* @returns True if the data is likely binary, false if it is likely text.
*/
export function isBinary(data: Uint8Array): boolean {
init();
return wasmIsBinary(data);
}
function init() {
if (isInitialised) {
return;

View file

@ -9,6 +9,5 @@
"declarationDir": "./dist/types",
"skipLibCheck": true,
"inlineSourceMap": true
},
"exclude": ["./dist", "**/*.test.ts"]
}
}