Improve compact diff API (#24)
* Remove is_binary from API * Format * Rename file * Test with more feature combinations * Don't depend on serde for wasm * Fix lint & tests * Don't unwrap to MAX number * Expose undiff to JS * Add undiff tests * Lint * Change name
This commit is contained in:
parent
6191d1adb3
commit
e85eb485e8
20 changed files with 430 additions and 424 deletions
14
reconcile-js/package-lock.json
generated
14
reconcile-js/package-lock.json
generated
|
|
@ -1231,13 +1231,13 @@
|
|||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@types/node": {
|
||||
"version": "24.0.10",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-24.0.10.tgz",
|
||||
"integrity": "sha512-ENHwaH+JIRTDIEEbDK6QSQntAYGtbvdDXnMXnZaZ6k13Du1dPMmprkEHIL7ok2Wl2aZevetwTAb5S+7yIF+enA==",
|
||||
"version": "24.10.1",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-24.10.1.tgz",
|
||||
"integrity": "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"undici-types": "~7.8.0"
|
||||
"undici-types": "~7.16.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/stack-utils": {
|
||||
|
|
@ -5274,9 +5274,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/undici-types": {
|
||||
"version": "7.8.0",
|
||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.8.0.tgz",
|
||||
"integrity": "sha512-9UJ2xGDvQ43tYyVMpuHlsgApydB8ZKfVYTsLDhXkFL/6gfkp+U8xTGdh8pMJv1SpZna0zxG1DwsKZsreLbXBxw==",
|
||||
"version": "7.16.0",
|
||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz",
|
||||
"integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
|
|
|
|||
|
|
@ -1,4 +1,9 @@
|
|||
import { reconcile, reconcileWithHistory } from './index';
|
||||
import { reconcile, reconcileWithHistory, diff, undiff } from './index';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||
|
||||
describe('reconcile', () => {
|
||||
it('call reconcile without cursors', () => {
|
||||
|
|
@ -44,3 +49,35 @@ describe('reconcile', () => {
|
|||
expect(result.history.length).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('test_diff_and_undiff_are_inverse', () => {
|
||||
const resourcesPath = path.join(__dirname, '../../tests/resources');
|
||||
|
||||
const readFileSlice = (fileName: string, start: number, end: number): string => {
|
||||
const filePath = path.join(resourcesPath, fileName);
|
||||
const content = fs.readFileSync(filePath, 'utf-8');
|
||||
const chars = Array.from(content); // Handle unicode properly
|
||||
return chars.slice(start, Math.min(end, chars.length)).join('');
|
||||
};
|
||||
|
||||
const files = ['pride_and_prejudice.txt', 'room_with_a_view.txt', 'blns.txt'];
|
||||
|
||||
const ranges = [{ start: 0, end: 50000 }];
|
||||
|
||||
files.forEach((file1) => {
|
||||
files.forEach((file2) => {
|
||||
ranges.forEach((range1) => {
|
||||
ranges.forEach((range2) => {
|
||||
it(`should diff & undiff ${file1}[${range1.start}..${range1.end}], ${file2}[${range2.start}..${range2.end}] without panic`, () => {
|
||||
const content1 = readFileSlice(file1, range1.start, range1.end);
|
||||
const content2 = readFileSlice(file2, range2.start, range2.end);
|
||||
|
||||
const changes = diff(content1, content2);
|
||||
const actual = undiff(content1, changes);
|
||||
expect(actual).toEqual(content2);
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -4,8 +4,8 @@ import {
|
|||
TextWithCursors as wasmTextWithCursors,
|
||||
SpanWithHistory as wasmSpanWithHistory,
|
||||
reconcileWithHistory as wasmReconcileWithHistory,
|
||||
isBinary as wasmIsBinary,
|
||||
getCompactDiff as wasmGetCompactDiff,
|
||||
diff as wasmDiff,
|
||||
undiff as wasmUndiff,
|
||||
initSync,
|
||||
} from 'reconcile-text';
|
||||
|
||||
|
|
@ -183,22 +183,22 @@ export function reconcile(
|
|||
/**
|
||||
* Generates a compact diff representation between an original and changed text.
|
||||
*
|
||||
* These can be parsed and unpacked using Rust crate's EditedText::from_change_set.
|
||||
* These can be parsed and unpacked using the `undiff` function or the Rust crate's EditedText::from_diff.
|
||||
* Cursor positions are omitted from the diff result.
|
||||
*
|
||||
* This function computes the differences between two versions of text and returns
|
||||
* a compact string representation of those changes. The returned format is
|
||||
* serialised JSON.
|
||||
* a compact representation of those changes.
|
||||
*
|
||||
* @param original - The original/base version of the text
|
||||
* @param changed - The modified version of the text (either string or TextWithCursors with cursor positions)
|
||||
* @param tokenizer - The tokenisation strategy, which is the same as used in `reconcile`.
|
||||
* @returns A compact string representation of the diff between original and changed text
|
||||
* @returns An array representing the compact diff, with inserts as strings and deletes as negative integers.
|
||||
*/
|
||||
export function getCompactDiff(
|
||||
export function diff(
|
||||
original: string,
|
||||
changed: string | TextWithOptionalCursors,
|
||||
tokenizer: BuiltinTokenizer = 'Word'
|
||||
): string {
|
||||
): Array<number | string> {
|
||||
init();
|
||||
|
||||
if (!BUILTIN_TOKENIZERS.includes(tokenizer)) {
|
||||
|
|
@ -207,13 +207,38 @@ export function getCompactDiff(
|
|||
|
||||
const changedWasm = toWasmTextWithCursors(changed);
|
||||
|
||||
const result = wasmGetCompactDiff(original, changedWasm, tokenizer);
|
||||
const result = wasmDiff(original, changedWasm, tokenizer);
|
||||
|
||||
changedWasm.free();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Applies a compact diff to an original text to reconstruct the changed version.
|
||||
*
|
||||
* This function takes an original text and a compact diff representation (as produced
|
||||
* by the `diff` function) and reconstructs the modified text.
|
||||
*
|
||||
* @param original - The original/base version of the text
|
||||
* @param diff - The compact diff array representing changes (inserts as strings, deletes as negative integers)
|
||||
* @param tokenizer - The tokenisation strategy, which is the same as used in `reconcile`.
|
||||
* @returns The reconstructed changed text as a string.
|
||||
*/
|
||||
export function undiff(
|
||||
original: string,
|
||||
diff: Array<number | string>,
|
||||
tokenizer: BuiltinTokenizer = 'Word'
|
||||
): string {
|
||||
init();
|
||||
|
||||
if (!BUILTIN_TOKENIZERS.includes(tokenizer)) {
|
||||
throw new Error(UNSUPPORTED_TOKENIZER_ERROR);
|
||||
}
|
||||
|
||||
return wasmUndiff(original, diff, tokenizer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Merges three versions of text and returns detailed provenance information.
|
||||
*
|
||||
|
|
@ -272,19 +297,6 @@ export function reconcileWithHistory(
|
|||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Check (using heuristics) if the given data is binary or text content.
|
||||
*
|
||||
* Only text inputs can be reconciled using the library's functions.
|
||||
*
|
||||
* @param data - The data to check for binary content. This should be a Uint8Array.
|
||||
* @returns True if the data is likely binary, false if it is likely text.
|
||||
*/
|
||||
export function isBinary(data: Uint8Array): boolean {
|
||||
init();
|
||||
return wasmIsBinary(data);
|
||||
}
|
||||
|
||||
function init() {
|
||||
if (isInitialised) {
|
||||
return;
|
||||
|
|
|
|||
|
|
@ -9,6 +9,5 @@
|
|||
"declarationDir": "./dist/types",
|
||||
"skipLibCheck": true,
|
||||
"inlineSourceMap": true
|
||||
},
|
||||
"exclude": ["./dist", "**/*.test.ts"]
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue