Improve compact diff API (#24)
* Remove is_binary from API * Format * Rename file * Test with more feature combinations * Don't depend on serde for wasm * Fix lint & tests * Don't unwrap to MAX number * Expose undiff to JS * Add undiff tests * Lint * Change name
This commit is contained in:
parent
6191d1adb3
commit
e85eb485e8
20 changed files with 430 additions and 424 deletions
20
Cargo.lock
generated
20
Cargo.lock
generated
|
|
@ -124,12 +124,6 @@ version = "0.4.27"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
|
checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "memchr"
|
|
||||||
version = "2.7.6"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "memory_units"
|
name = "memory_units"
|
||||||
version = "0.4.0"
|
version = "0.4.0"
|
||||||
|
|
@ -188,7 +182,6 @@ dependencies = [
|
||||||
"insta",
|
"insta",
|
||||||
"pretty_assertions",
|
"pretty_assertions",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
|
||||||
"serde_yaml",
|
"serde_yaml",
|
||||||
"test-case",
|
"test-case",
|
||||||
"wasm-bindgen",
|
"wasm-bindgen",
|
||||||
|
|
@ -247,19 +240,6 @@ dependencies = [
|
||||||
"syn",
|
"syn",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "serde_json"
|
|
||||||
version = "1.0.145"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c"
|
|
||||||
dependencies = [
|
|
||||||
"itoa",
|
|
||||||
"memchr",
|
|
||||||
"ryu",
|
|
||||||
"serde",
|
|
||||||
"serde_core",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "serde_yaml"
|
name = "serde_yaml"
|
||||||
version = "0.9.34+deprecated"
|
version = "0.9.34+deprecated"
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,6 @@ path = "examples/merge-file.rs"
|
||||||
serde = { version = "1.0.219", optional = true, features = ["derive"] }
|
serde = { version = "1.0.219", optional = true, features = ["derive"] }
|
||||||
|
|
||||||
wasm-bindgen = { version = "0.2.99", optional = true }
|
wasm-bindgen = { version = "0.2.99", optional = true }
|
||||||
serde_json = { version = "1.0.145", optional = true }
|
|
||||||
|
|
||||||
# The `console_error_panic_hook` crate provides better debugging of panics by
|
# The `console_error_panic_hook` crate provides better debugging of panics by
|
||||||
# logging them with `console.error`. This is great for development, but requires
|
# logging them with `console.error`. This is great for development, but requires
|
||||||
|
|
@ -37,9 +36,9 @@ wee_alloc = { version = "0.4.2", optional = true }
|
||||||
[features]
|
[features]
|
||||||
default = []
|
default = []
|
||||||
serde = [ "dep:serde" ]
|
serde = [ "dep:serde" ]
|
||||||
wasm = [ "dep:wasm-bindgen", "dep:wee_alloc", "dep:serde_json", "serde" ]
|
wasm = [ "dep:wasm-bindgen", "dep:wee_alloc" ]
|
||||||
console_error_panic_hook = [ "dep:console_error_panic_hook" ]
|
console_error_panic_hook = [ "dep:console_error_panic_hook" ]
|
||||||
all = [ "wasm", "console_error_panic_hook" ]
|
all = [ "wasm", "console_error_panic_hook", "serde" ]
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
insta = "1.43.2"
|
insta = "1.43.2"
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,12 @@
|
||||||
<link rel="icon" type="image/x-icon" href="favicon.ico" />
|
<link rel="icon" type="image/x-icon" href="favicon.ico" />
|
||||||
<title>reconcile-text: conflict-free 3-way text merging</title>
|
<title>reconcile-text: conflict-free 3-way text merging</title>
|
||||||
<link inline inline-asset="index.css" inline-asset-delete />
|
<link inline inline-asset="index.css" inline-asset-delete />
|
||||||
<script defer data-domain="reconcile" data-api="https://stats.schmelczer.dev/status" src="https://stats.schmelczer.dev/js/script.outbound-links.js"></script>
|
<script
|
||||||
|
defer
|
||||||
|
data-domain="reconcile"
|
||||||
|
data-api="https://stats.schmelczer.dev/status"
|
||||||
|
src="https://stats.schmelczer.dev/js/script.outbound-links.js"
|
||||||
|
></script>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<div class="background"></div>
|
<div class="background"></div>
|
||||||
|
|
|
||||||
14
reconcile-js/package-lock.json
generated
14
reconcile-js/package-lock.json
generated
|
|
@ -1231,13 +1231,13 @@
|
||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
},
|
},
|
||||||
"node_modules/@types/node": {
|
"node_modules/@types/node": {
|
||||||
"version": "24.0.10",
|
"version": "24.10.1",
|
||||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-24.0.10.tgz",
|
"resolved": "https://registry.npmjs.org/@types/node/-/node-24.10.1.tgz",
|
||||||
"integrity": "sha512-ENHwaH+JIRTDIEEbDK6QSQntAYGtbvdDXnMXnZaZ6k13Du1dPMmprkEHIL7ok2Wl2aZevetwTAb5S+7yIF+enA==",
|
"integrity": "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"undici-types": "~7.8.0"
|
"undici-types": "~7.16.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@types/stack-utils": {
|
"node_modules/@types/stack-utils": {
|
||||||
|
|
@ -5274,9 +5274,9 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/undici-types": {
|
"node_modules/undici-types": {
|
||||||
"version": "7.8.0",
|
"version": "7.16.0",
|
||||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.8.0.tgz",
|
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz",
|
||||||
"integrity": "sha512-9UJ2xGDvQ43tYyVMpuHlsgApydB8ZKfVYTsLDhXkFL/6gfkp+U8xTGdh8pMJv1SpZna0zxG1DwsKZsreLbXBxw==",
|
"integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
},
|
},
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,9 @@
|
||||||
import { reconcile, reconcileWithHistory } from './index';
|
import { reconcile, reconcileWithHistory, diff, undiff } from './index';
|
||||||
|
import * as fs from 'fs';
|
||||||
|
import * as path from 'path';
|
||||||
|
import { fileURLToPath } from 'url';
|
||||||
|
|
||||||
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||||
|
|
||||||
describe('reconcile', () => {
|
describe('reconcile', () => {
|
||||||
it('call reconcile without cursors', () => {
|
it('call reconcile without cursors', () => {
|
||||||
|
|
@ -44,3 +49,35 @@ describe('reconcile', () => {
|
||||||
expect(result.history.length).toBeGreaterThan(0);
|
expect(result.history.length).toBeGreaterThan(0);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('test_diff_and_undiff_are_inverse', () => {
|
||||||
|
const resourcesPath = path.join(__dirname, '../../tests/resources');
|
||||||
|
|
||||||
|
const readFileSlice = (fileName: string, start: number, end: number): string => {
|
||||||
|
const filePath = path.join(resourcesPath, fileName);
|
||||||
|
const content = fs.readFileSync(filePath, 'utf-8');
|
||||||
|
const chars = Array.from(content); // Handle unicode properly
|
||||||
|
return chars.slice(start, Math.min(end, chars.length)).join('');
|
||||||
|
};
|
||||||
|
|
||||||
|
const files = ['pride_and_prejudice.txt', 'room_with_a_view.txt', 'blns.txt'];
|
||||||
|
|
||||||
|
const ranges = [{ start: 0, end: 50000 }];
|
||||||
|
|
||||||
|
files.forEach((file1) => {
|
||||||
|
files.forEach((file2) => {
|
||||||
|
ranges.forEach((range1) => {
|
||||||
|
ranges.forEach((range2) => {
|
||||||
|
it(`should diff & undiff ${file1}[${range1.start}..${range1.end}], ${file2}[${range2.start}..${range2.end}] without panic`, () => {
|
||||||
|
const content1 = readFileSlice(file1, range1.start, range1.end);
|
||||||
|
const content2 = readFileSlice(file2, range2.start, range2.end);
|
||||||
|
|
||||||
|
const changes = diff(content1, content2);
|
||||||
|
const actual = undiff(content1, changes);
|
||||||
|
expect(actual).toEqual(content2);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
|
||||||
|
|
@ -4,8 +4,8 @@ import {
|
||||||
TextWithCursors as wasmTextWithCursors,
|
TextWithCursors as wasmTextWithCursors,
|
||||||
SpanWithHistory as wasmSpanWithHistory,
|
SpanWithHistory as wasmSpanWithHistory,
|
||||||
reconcileWithHistory as wasmReconcileWithHistory,
|
reconcileWithHistory as wasmReconcileWithHistory,
|
||||||
isBinary as wasmIsBinary,
|
diff as wasmDiff,
|
||||||
getCompactDiff as wasmGetCompactDiff,
|
undiff as wasmUndiff,
|
||||||
initSync,
|
initSync,
|
||||||
} from 'reconcile-text';
|
} from 'reconcile-text';
|
||||||
|
|
||||||
|
|
@ -183,22 +183,22 @@ export function reconcile(
|
||||||
/**
|
/**
|
||||||
* Generates a compact diff representation between an original and changed text.
|
* Generates a compact diff representation between an original and changed text.
|
||||||
*
|
*
|
||||||
* These can be parsed and unpacked using Rust crate's EditedText::from_change_set.
|
* These can be parsed and unpacked using the `undiff` function or the Rust crate's EditedText::from_diff.
|
||||||
|
* Cursor positions are omitted from the diff result.
|
||||||
*
|
*
|
||||||
* This function computes the differences between two versions of text and returns
|
* This function computes the differences between two versions of text and returns
|
||||||
* a compact string representation of those changes. The returned format is
|
* a compact representation of those changes.
|
||||||
* serialised JSON.
|
|
||||||
*
|
*
|
||||||
* @param original - The original/base version of the text
|
* @param original - The original/base version of the text
|
||||||
* @param changed - The modified version of the text (either string or TextWithCursors with cursor positions)
|
* @param changed - The modified version of the text (either string or TextWithCursors with cursor positions)
|
||||||
* @param tokenizer - The tokenisation strategy, which is the same as used in `reconcile`.
|
* @param tokenizer - The tokenisation strategy, which is the same as used in `reconcile`.
|
||||||
* @returns A compact string representation of the diff between original and changed text
|
* @returns An array representing the compact diff, with inserts as strings and deletes as negative integers.
|
||||||
*/
|
*/
|
||||||
export function getCompactDiff(
|
export function diff(
|
||||||
original: string,
|
original: string,
|
||||||
changed: string | TextWithOptionalCursors,
|
changed: string | TextWithOptionalCursors,
|
||||||
tokenizer: BuiltinTokenizer = 'Word'
|
tokenizer: BuiltinTokenizer = 'Word'
|
||||||
): string {
|
): Array<number | string> {
|
||||||
init();
|
init();
|
||||||
|
|
||||||
if (!BUILTIN_TOKENIZERS.includes(tokenizer)) {
|
if (!BUILTIN_TOKENIZERS.includes(tokenizer)) {
|
||||||
|
|
@ -207,13 +207,38 @@ export function getCompactDiff(
|
||||||
|
|
||||||
const changedWasm = toWasmTextWithCursors(changed);
|
const changedWasm = toWasmTextWithCursors(changed);
|
||||||
|
|
||||||
const result = wasmGetCompactDiff(original, changedWasm, tokenizer);
|
const result = wasmDiff(original, changedWasm, tokenizer);
|
||||||
|
|
||||||
changedWasm.free();
|
changedWasm.free();
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Applies a compact diff to an original text to reconstruct the changed version.
|
||||||
|
*
|
||||||
|
* This function takes an original text and a compact diff representation (as produced
|
||||||
|
* by the `diff` function) and reconstructs the modified text.
|
||||||
|
*
|
||||||
|
* @param original - The original/base version of the text
|
||||||
|
* @param diff - The compact diff array representing changes (inserts as strings, deletes as negative integers)
|
||||||
|
* @param tokenizer - The tokenisation strategy, which is the same as used in `reconcile`.
|
||||||
|
* @returns The reconstructed changed text as a string.
|
||||||
|
*/
|
||||||
|
export function undiff(
|
||||||
|
original: string,
|
||||||
|
diff: Array<number | string>,
|
||||||
|
tokenizer: BuiltinTokenizer = 'Word'
|
||||||
|
): string {
|
||||||
|
init();
|
||||||
|
|
||||||
|
if (!BUILTIN_TOKENIZERS.includes(tokenizer)) {
|
||||||
|
throw new Error(UNSUPPORTED_TOKENIZER_ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
|
return wasmUndiff(original, diff, tokenizer);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Merges three versions of text and returns detailed provenance information.
|
* Merges three versions of text and returns detailed provenance information.
|
||||||
*
|
*
|
||||||
|
|
@ -272,19 +297,6 @@ export function reconcileWithHistory(
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Check (using heuristics) if the given data is binary or text content.
|
|
||||||
*
|
|
||||||
* Only text inputs can be reconciled using the library's functions.
|
|
||||||
*
|
|
||||||
* @param data - The data to check for binary content. This should be a Uint8Array.
|
|
||||||
* @returns True if the data is likely binary, false if it is likely text.
|
|
||||||
*/
|
|
||||||
export function isBinary(data: Uint8Array): boolean {
|
|
||||||
init();
|
|
||||||
return wasmIsBinary(data);
|
|
||||||
}
|
|
||||||
|
|
||||||
function init() {
|
function init() {
|
||||||
if (isInitialised) {
|
if (isInitialised) {
|
||||||
return;
|
return;
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,5 @@
|
||||||
"declarationDir": "./dist/types",
|
"declarationDir": "./dist/types",
|
||||||
"skipLibCheck": true,
|
"skipLibCheck": true,
|
||||||
"inlineSourceMap": true
|
"inlineSourceMap": true
|
||||||
},
|
}
|
||||||
"exclude": ["./dist", "**/*.test.ts"]
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,12 @@ set -e
|
||||||
|
|
||||||
wasm-pack build --target web --features wasm
|
wasm-pack build --target web --features wasm
|
||||||
cargo test --verbose --features serde -- --include-ignored
|
cargo test --verbose --features serde -- --include-ignored
|
||||||
cargo test --features serde,wasm
|
|
||||||
|
cargo test
|
||||||
|
cargo test --features serde
|
||||||
|
cargo test --features wasm
|
||||||
|
cargo test --features all
|
||||||
|
|
||||||
wasm-pack test --node --features wasm
|
wasm-pack test --node --features wasm
|
||||||
|
|
||||||
cd reconcile-js
|
cd reconcile-js
|
||||||
|
|
|
||||||
25
src/lib.rs
25
src/lib.rs
|
|
@ -157,6 +157,8 @@
|
||||||
//! original text, making the size only depends on the changes made.
|
//! original text, making the size only depends on the changes made.
|
||||||
//!
|
//!
|
||||||
//! ```rust
|
//! ```rust
|
||||||
|
//! # #[cfg(feature = "serde")]
|
||||||
|
//! # {
|
||||||
//! use reconcile_text::{EditedText, BuiltinTokenizer};
|
//! use reconcile_text::{EditedText, BuiltinTokenizer};
|
||||||
//! use serde_yaml;
|
//! use serde_yaml;
|
||||||
//! use pretty_assertions::assert_eq;
|
//! use pretty_assertions::assert_eq;
|
||||||
|
|
@ -170,20 +172,18 @@
|
||||||
//! &changes.into()
|
//! &changes.into()
|
||||||
//! );
|
//! );
|
||||||
//!
|
//!
|
||||||
//! let serialized = serde_yaml::to_string(&result.to_change_set()).unwrap();
|
//! let serialized = serde_yaml::to_string(&result.to_diff()).unwrap();
|
||||||
//! assert_eq!(
|
//! assert_eq!(
|
||||||
//! serialized,
|
//! serialized,
|
||||||
//! concat!(
|
//! concat!(
|
||||||
//! "operations:\n",
|
|
||||||
//! "- 15\n",
|
//! "- 15\n",
|
||||||
//! "- -6\n",
|
//! "- -6\n",
|
||||||
//! "- ' easy with reconcile!'\n",
|
//! "- ' easy with reconcile!'\n"
|
||||||
//! "cursors: []\n"
|
|
||||||
//! )
|
//! )
|
||||||
//! );
|
//! );
|
||||||
//!
|
//!
|
||||||
//! let deserialized = serde_yaml::from_str(&serialized).unwrap();
|
//! let deserialized = serde_yaml::from_str(&serialized).unwrap();
|
||||||
//! let reconstructed = EditedText::from_change_set(
|
//! let reconstructed = EditedText::from_diff(
|
||||||
//! original,
|
//! original,
|
||||||
//! deserialized,
|
//! deserialized,
|
||||||
//! &*BuiltinTokenizer::Word
|
//! &*BuiltinTokenizer::Word
|
||||||
|
|
@ -192,13 +192,17 @@
|
||||||
//! reconstructed.apply().text(),
|
//! reconstructed.apply().text(),
|
||||||
//! "Merging text is easy with reconcile!"
|
//! "Merging text is easy with reconcile!"
|
||||||
//! );
|
//! );
|
||||||
|
//! # }
|
||||||
//! ```
|
//! ```
|
||||||
//!
|
//!
|
||||||
//! ## Error handling
|
//! ## Error handling
|
||||||
//!
|
//!
|
||||||
//! The library is designed to be robust and will always produce a result, even
|
//! The library is designed to be robust and will always produce a result, even
|
||||||
//! in edge cases. However, be aware that extremely large diffs may have
|
//! for edge cases.
|
||||||
//! performance implications.
|
//!
|
||||||
|
//! ## Performance
|
||||||
|
//!
|
||||||
|
//! Be aware that extremely large diffs may have performance implications.
|
||||||
//!
|
//!
|
||||||
//! ## Algorithm overview
|
//! ## Algorithm overview
|
||||||
//!
|
//!
|
||||||
|
|
@ -211,13 +215,12 @@ mod tokenizer;
|
||||||
mod types;
|
mod types;
|
||||||
mod utils;
|
mod utils;
|
||||||
|
|
||||||
pub use operation_transformation::{ChangeSet, EditedText, reconcile};
|
pub use operation_transformation::{EditedText, reconcile};
|
||||||
pub use tokenizer::{BuiltinTokenizer, Tokenizer, token::Token};
|
pub use tokenizer::{BuiltinTokenizer, Tokenizer, token::Token};
|
||||||
pub use types::{
|
pub use types::{
|
||||||
cursor_position::CursorPosition, history::History, side::Side,
|
cursor_position::CursorPosition, history::History, number_or_string::NumberOrString,
|
||||||
span_with_history::SpanWithHistory, text_with_cursors::TextWithCursors,
|
side::Side, span_with_history::SpanWithHistory, text_with_cursors::TextWithCursors,
|
||||||
};
|
};
|
||||||
pub use utils::is_binary::is_binary;
|
|
||||||
|
|
||||||
#[cfg(feature = "wasm")]
|
#[cfg(feature = "wasm")]
|
||||||
pub mod wasm;
|
pub mod wasm;
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,10 @@
|
||||||
mod edited_text;
|
mod edited_text;
|
||||||
mod operation;
|
mod operation;
|
||||||
mod transport;
|
|
||||||
mod utils;
|
mod utils;
|
||||||
use std::fmt::Debug;
|
use std::fmt::Debug;
|
||||||
|
|
||||||
pub use edited_text::EditedText;
|
pub use edited_text::EditedText;
|
||||||
pub use operation::Operation;
|
pub use operation::Operation;
|
||||||
pub use transport::ChangeSet;
|
|
||||||
|
|
||||||
use crate::{Tokenizer, types::text_with_cursors::TextWithCursors};
|
use crate::{Tokenizer, types::text_with_cursors::TextWithCursors};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,15 +4,17 @@ use std::{fmt::Debug, vec};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
BuiltinTokenizer, ChangeSet, CursorPosition, TextWithCursors,
|
BuiltinTokenizer, CursorPosition, TextWithCursors,
|
||||||
operation_transformation::{
|
operation_transformation::{
|
||||||
Operation,
|
Operation,
|
||||||
transport::SimpleOperation,
|
|
||||||
utils::{cook_operations::cook_operations, elongate_operations::elongate_operations},
|
utils::{cook_operations::cook_operations, elongate_operations::elongate_operations},
|
||||||
},
|
},
|
||||||
raw_operation::RawOperation,
|
raw_operation::RawOperation,
|
||||||
tokenizer::Tokenizer,
|
tokenizer::Tokenizer,
|
||||||
types::{history::History, side::Side, span_with_history::SpanWithHistory},
|
types::{
|
||||||
|
history::History, number_or_string::NumberOrString, side::Side,
|
||||||
|
span_with_history::SpanWithHistory,
|
||||||
|
},
|
||||||
utils::string_builder::StringBuilder,
|
utils::string_builder::StringBuilder,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -105,6 +107,11 @@ where
|
||||||
/// from the same original text. The operations are merged using the
|
/// from the same original text. The operations are merged using the
|
||||||
/// principles of Operational Transformation. The cursors are updated
|
/// principles of Operational Transformation. The cursors are updated
|
||||||
/// accordingly to reflect the changes made by the merged operations.
|
/// accordingly to reflect the changes made by the merged operations.
|
||||||
|
///
|
||||||
|
/// # Panics
|
||||||
|
///
|
||||||
|
/// Panics if there's an integer overflow (in i64) when calculating new
|
||||||
|
/// cursor positions.
|
||||||
#[must_use]
|
#[must_use]
|
||||||
#[allow(clippy::too_many_lines)]
|
#[allow(clippy::too_many_lines)]
|
||||||
pub fn merge(self, other: Self) -> Self {
|
pub fn merge(self, other: Self) -> Self {
|
||||||
|
|
@ -166,13 +173,14 @@ where
|
||||||
let result = operation.merge_operations(&mut last_other_op);
|
let result = operation.merge_operations(&mut last_other_op);
|
||||||
|
|
||||||
if let ref op @ (Operation::Insert { .. } | Operation::Equal { .. }) = result {
|
if let ref op @ (Operation::Insert { .. } | Operation::Equal { .. }) = result {
|
||||||
let merged_length_signed =
|
let merged_length_signed = isize::try_from(merged_length)
|
||||||
isize::try_from(merged_length).unwrap_or(isize::MAX);
|
.expect("merged_length must fit in isize");
|
||||||
let seen_left_length_signed =
|
let seen_left_length_signed = isize::try_from(seen_left_length)
|
||||||
isize::try_from(seen_left_length).unwrap_or(isize::MAX);
|
.expect("seen_left_length must fit in isize");
|
||||||
let op_len_signed = isize::try_from(op.len()).unwrap_or(isize::MAX);
|
let op_len_signed =
|
||||||
let original_length_signed =
|
isize::try_from(op.len()).expect("op.len() must fit in isize");
|
||||||
isize::try_from(original_length).unwrap_or(isize::MAX);
|
let original_length_signed = isize::try_from(original_length)
|
||||||
|
.expect("original_length must fit in isize");
|
||||||
|
|
||||||
let shift = merged_length_signed - seen_left_length_signed + op_len_signed
|
let shift = merged_length_signed - seen_left_length_signed + op_len_signed
|
||||||
- original_length_signed;
|
- original_length_signed;
|
||||||
|
|
@ -199,13 +207,14 @@ where
|
||||||
let result = operation.merge_operations(&mut last_other_op);
|
let result = operation.merge_operations(&mut last_other_op);
|
||||||
|
|
||||||
if let ref op @ (Operation::Insert { .. } | Operation::Equal { .. }) = result {
|
if let ref op @ (Operation::Insert { .. } | Operation::Equal { .. }) = result {
|
||||||
let merged_length_signed =
|
let merged_length_signed = isize::try_from(merged_length)
|
||||||
isize::try_from(merged_length).unwrap_or(isize::MAX);
|
.expect("merged_length must fit in isize");
|
||||||
let seen_right_length_signed =
|
let seen_right_length_signed = isize::try_from(seen_right_length)
|
||||||
isize::try_from(seen_right_length).unwrap_or(isize::MAX);
|
.expect("seen_right_length must fit in isize");
|
||||||
let op_len_signed = isize::try_from(op.len()).unwrap_or(isize::MAX);
|
let op_len_signed =
|
||||||
let original_length_signed =
|
isize::try_from(op.len()).expect("op.len() must fit in isize");
|
||||||
isize::try_from(original_length).unwrap_or(isize::MAX);
|
let original_length_signed = isize::try_from(original_length)
|
||||||
|
.expect("original_length must fit in isize");
|
||||||
|
|
||||||
let shift = merged_length_signed - seen_right_length_signed + op_len_signed
|
let shift = merged_length_signed - seen_right_length_signed + op_len_signed
|
||||||
- original_length_signed;
|
- original_length_signed;
|
||||||
|
|
@ -345,34 +354,122 @@ where
|
||||||
history
|
history
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Serialize the `EditedText` as a `ChangeSet`, which contains only
|
/// Convert the `EditedText` into a terse representation ready for
|
||||||
/// the operations and cursor positions, but without the original text.
|
/// serialization. The result omits cursor positions and the original text.
|
||||||
/// This is useful for sending changes over the network if there's
|
/// This is useful for sending text diffs over the network if there's a
|
||||||
/// a clear consensus on the original text.
|
/// clear consensus on the original text.
|
||||||
|
///
|
||||||
|
/// Inserts are represented as strings, deletes as negative integers,
|
||||||
|
/// and equal spans as positive integers.
|
||||||
|
///
|
||||||
|
/// # Panics
|
||||||
|
///
|
||||||
|
/// Panics if there's an integer overflow in i64.
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub fn to_change_set(&self) -> ChangeSet {
|
pub fn to_diff(&self) -> Vec<NumberOrString> {
|
||||||
ChangeSet::new(
|
let mut result: Vec<NumberOrString> = Vec::with_capacity(self.operations.len());
|
||||||
SimpleOperation::from_operations(&self.operations),
|
let mut previous_equal: Option<usize> = None;
|
||||||
self.cursors.clone(),
|
|
||||||
)
|
for operation in &self.operations {
|
||||||
|
match operation {
|
||||||
|
Operation::Equal { length, .. } => {
|
||||||
|
if let Some(prev_length) = previous_equal {
|
||||||
|
previous_equal = Some(prev_length + *length);
|
||||||
|
} else {
|
||||||
|
previous_equal = Some(*length);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Deserialize an `EditedText` from a `ChangeSet` and the original text.
|
Operation::Insert { text, .. } => {
|
||||||
/// This is useful for reconstructing the `EditedText` on the receiving
|
if let Some(prev_length) = previous_equal {
|
||||||
/// end after sending only the `ChangeSet` over the network.
|
result.push(NumberOrString::Number(
|
||||||
|
i64::try_from(prev_length).expect("prev_length must fit in i64"),
|
||||||
|
));
|
||||||
|
previous_equal = None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let text: String = text
|
||||||
|
.iter()
|
||||||
|
.map(super::super::tokenizer::token::Token::original)
|
||||||
|
.collect();
|
||||||
|
result.push(NumberOrString::Text(text));
|
||||||
|
}
|
||||||
|
|
||||||
|
Operation::Delete {
|
||||||
|
deleted_character_count,
|
||||||
|
..
|
||||||
|
} => {
|
||||||
|
if let Some(prev_length) = previous_equal {
|
||||||
|
result.push(NumberOrString::Number(
|
||||||
|
i64::try_from(prev_length).expect("prev_length must fit in i64"),
|
||||||
|
));
|
||||||
|
previous_equal = None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let count = i64::try_from(*deleted_character_count)
|
||||||
|
.expect("deleted_character_count must fit in i64");
|
||||||
|
result.push(NumberOrString::Number(-count));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(prev_length) = previous_equal {
|
||||||
|
result.push(NumberOrString::Number(
|
||||||
|
i64::try_from(prev_length).expect("prev_length must fit in i64"),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Deserialize an `EditedText` from a change list and the original text.
|
||||||
|
///
|
||||||
|
/// # Panics
|
||||||
|
///
|
||||||
|
/// Panics if there's an integer overflow in i64.
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub fn from_change_set(
|
pub fn from_diff(
|
||||||
text: &'a str,
|
original_text: &'a str,
|
||||||
change_set: ChangeSet,
|
diff: Vec<NumberOrString>,
|
||||||
tokenizer: &Tokenizer<T>,
|
tokenizer: &Tokenizer<T>,
|
||||||
) -> EditedText<'a, T> {
|
) -> EditedText<'a, T> {
|
||||||
let operations = SimpleOperation::to_operations(change_set.operations, text, tokenizer);
|
let mut operations: Vec<Operation<T>> = Vec::with_capacity(diff.len());
|
||||||
|
let mut order = 0;
|
||||||
|
|
||||||
|
for item in diff {
|
||||||
|
match item {
|
||||||
|
NumberOrString::Number(length) => {
|
||||||
|
if length >= 0 {
|
||||||
|
let length = usize::try_from(length).expect("length must fit in usize");
|
||||||
|
let original_characters: String =
|
||||||
|
original_text.chars().skip(order).take(length).collect();
|
||||||
|
|
||||||
|
let original_tokens = tokenizer(&original_characters);
|
||||||
|
for token in original_tokens {
|
||||||
|
operations
|
||||||
|
.push(Operation::create_equal(order, token.get_original_length()));
|
||||||
|
order += token.get_original_length();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
let length =
|
||||||
|
usize::try_from(-length).expect("negative length must fit in usize");
|
||||||
|
operations.push(Operation::create_delete(order, length));
|
||||||
|
order += length;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
NumberOrString::Text(text) => {
|
||||||
|
let tokens = tokenizer(&text);
|
||||||
|
operations.push(Operation::create_insert(order, tokens));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let operation_count = operations.len();
|
let operation_count = operations.len();
|
||||||
EditedText::new(
|
EditedText::new(
|
||||||
text,
|
original_text,
|
||||||
operations,
|
operations,
|
||||||
vec![Side::Left; operation_count],
|
vec![Side::Left; operation_count],
|
||||||
change_set.cursors,
|
vec![],
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -423,34 +520,29 @@ mod tests {
|
||||||
assert_eq!(operations.apply().text(), expected);
|
assert_eq!(operations.apply().text(), expected);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "serde")]
|
||||||
#[test]
|
#[test]
|
||||||
fn test_change_set_deserialisation() {
|
fn test_changes_deserialisation() {
|
||||||
let original = "Merging text is hard!";
|
let original = "Merging text is hard!";
|
||||||
let changes = "Merging text is easy with reconcile!";
|
let changes = "Merging text is easy with reconcile!";
|
||||||
let result = EditedText::from_strings(original, &changes.into());
|
let result = EditedText::from_strings(original, &changes.into());
|
||||||
let serialized = serde_yaml::to_string(&result.to_change_set()).unwrap();
|
let serialized = serde_yaml::to_string(&result.to_diff()).unwrap();
|
||||||
|
|
||||||
let expected = concat!(
|
|
||||||
"operations:\n",
|
|
||||||
"- 15\n",
|
|
||||||
"- -6\n",
|
|
||||||
"- ' easy with reconcile!'\n",
|
|
||||||
"cursors: []\n"
|
|
||||||
);
|
|
||||||
|
|
||||||
|
let expected = concat!("- 15\n", "- -6\n", "- ' easy with reconcile!'\n",);
|
||||||
assert_eq!(serialized, expected);
|
assert_eq!(serialized, expected);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "serde")]
|
||||||
#[test]
|
#[test]
|
||||||
fn test_change_set_serialization() {
|
fn test_changes_serialization() {
|
||||||
let original = "The quick brown fox jumps over the lazy dog.";
|
let original = "The quick brown fox jumps over the lazy dog.";
|
||||||
let updated = "The quick red fox jumped over the very lazy dog!";
|
let updated = "The quick red fox jumped over the very lazy dog!";
|
||||||
|
|
||||||
let edited_text = EditedText::from_strings(original, &updated.into());
|
let edited_text = EditedText::from_strings(original, &updated.into());
|
||||||
|
|
||||||
let change_set = edited_text.to_change_set();
|
let changes = edited_text.to_diff();
|
||||||
let deserialized_edited_text =
|
let deserialized_edited_text =
|
||||||
EditedText::from_change_set(original, change_set, &*BuiltinTokenizer::Word);
|
EditedText::from_diff(original, changes, &*BuiltinTokenizer::Word);
|
||||||
|
|
||||||
assert_eq!(deserialized_edited_text.apply().text(), updated);
|
assert_eq!(deserialized_edited_text.apply().text(), updated);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,204 +0,0 @@
|
||||||
use std::fmt::Debug;
|
|
||||||
|
|
||||||
#[cfg(feature = "serde")]
|
|
||||||
use serde::{
|
|
||||||
Deserialize, Serialize,
|
|
||||||
de::{self, Deserializer, Visitor},
|
|
||||||
ser::Serializer,
|
|
||||||
};
|
|
||||||
|
|
||||||
use crate::{CursorPosition, Tokenizer, operation_transformation::Operation};
|
|
||||||
|
|
||||||
#[derive(Clone, PartialEq, Eq, Debug)]
|
|
||||||
pub enum SimpleOperation {
|
|
||||||
Equal { length: usize },
|
|
||||||
Insert { text: String },
|
|
||||||
Delete { length: usize },
|
|
||||||
}
|
|
||||||
|
|
||||||
impl SimpleOperation {
|
|
||||||
pub fn from_operations<T>(operation: &Vec<Operation<T>>) -> Vec<Self>
|
|
||||||
where
|
|
||||||
T: PartialEq + Clone + Debug,
|
|
||||||
{
|
|
||||||
let mut result: Vec<Self> = Vec::with_capacity(operation.len());
|
|
||||||
let mut previous_equal: Option<usize> = None;
|
|
||||||
|
|
||||||
for operation in operation {
|
|
||||||
match operation {
|
|
||||||
Operation::Equal { length, .. } => {
|
|
||||||
if let Some(prev_length) = previous_equal {
|
|
||||||
previous_equal = Some(prev_length + *length);
|
|
||||||
} else {
|
|
||||||
previous_equal = Some(*length);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Operation::Insert { text, .. } => {
|
|
||||||
if let Some(prev_length) = previous_equal {
|
|
||||||
result.push(SimpleOperation::Equal {
|
|
||||||
length: prev_length,
|
|
||||||
});
|
|
||||||
previous_equal = None;
|
|
||||||
}
|
|
||||||
|
|
||||||
let text: String = text
|
|
||||||
.iter()
|
|
||||||
.map(super::super::tokenizer::token::Token::original)
|
|
||||||
.collect();
|
|
||||||
result.push(SimpleOperation::Insert { text });
|
|
||||||
}
|
|
||||||
|
|
||||||
Operation::Delete {
|
|
||||||
deleted_character_count,
|
|
||||||
..
|
|
||||||
} => {
|
|
||||||
if let Some(prev_length) = previous_equal {
|
|
||||||
result.push(SimpleOperation::Equal {
|
|
||||||
length: prev_length,
|
|
||||||
});
|
|
||||||
previous_equal = None;
|
|
||||||
}
|
|
||||||
|
|
||||||
result.push(SimpleOperation::Delete {
|
|
||||||
length: *deleted_character_count,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(prev_length) = previous_equal {
|
|
||||||
result.push(SimpleOperation::Equal {
|
|
||||||
length: prev_length,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
result
|
|
||||||
}
|
|
||||||
|
|
||||||
// This is similar to `crate::operation_transformation::utils::cook_operations`
|
|
||||||
pub fn to_operations<T>(
|
|
||||||
simple_operations: Vec<Self>,
|
|
||||||
original_text: &str,
|
|
||||||
tokenizer: &Tokenizer<T>,
|
|
||||||
) -> Vec<Operation<T>>
|
|
||||||
where
|
|
||||||
T: PartialEq + Clone + Debug,
|
|
||||||
{
|
|
||||||
let mut operations: Vec<Operation<T>> = Vec::with_capacity(simple_operations.len());
|
|
||||||
let mut order = 0;
|
|
||||||
|
|
||||||
for simple_operation in simple_operations {
|
|
||||||
match simple_operation {
|
|
||||||
SimpleOperation::Equal { length } => {
|
|
||||||
let original_characters: String =
|
|
||||||
original_text.chars().skip(order).take(length).collect();
|
|
||||||
|
|
||||||
let original_tokens = tokenizer(&original_characters);
|
|
||||||
for token in original_tokens {
|
|
||||||
operations
|
|
||||||
.push(Operation::create_equal(order, token.get_original_length()));
|
|
||||||
order += token.get_original_length();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
SimpleOperation::Insert { text } => {
|
|
||||||
let tokens = tokenizer(&text);
|
|
||||||
operations.push(Operation::create_insert(order, tokens));
|
|
||||||
}
|
|
||||||
|
|
||||||
SimpleOperation::Delete { length } => {
|
|
||||||
operations.push(Operation::create_delete(order, length));
|
|
||||||
order += length;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
operations
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(feature = "serde")]
|
|
||||||
impl Serialize for SimpleOperation {
|
|
||||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
|
||||||
where
|
|
||||||
S: Serializer,
|
|
||||||
{
|
|
||||||
// neat idea from https://github.com/spebern/operational-transform-rs/blob/9faa17f0a2b282ac2e09dbb2d29fdaf2ae0bbb4a/operational-transform/src/serde.rs#L14
|
|
||||||
match self {
|
|
||||||
SimpleOperation::Equal { length } => serializer.serialize_u64(*length as u64),
|
|
||||||
SimpleOperation::Insert { text } => serializer.serialize_str(text),
|
|
||||||
SimpleOperation::Delete { length } => {
|
|
||||||
serializer.serialize_i64(-(i64::try_from(*length).unwrap_or(i64::MAX)))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(feature = "serde")]
|
|
||||||
impl<'de> Deserialize<'de> for SimpleOperation {
|
|
||||||
fn deserialize<D>(deserializer: D) -> Result<SimpleOperation, D::Error>
|
|
||||||
where
|
|
||||||
D: Deserializer<'de>,
|
|
||||||
{
|
|
||||||
use std::fmt;
|
|
||||||
|
|
||||||
struct OperationVisitor;
|
|
||||||
|
|
||||||
impl Visitor<'_> for OperationVisitor {
|
|
||||||
type Value = SimpleOperation;
|
|
||||||
|
|
||||||
fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
||||||
formatter.write_str("an integer between -2^63 and 2^64-1 or a string")
|
|
||||||
}
|
|
||||||
|
|
||||||
fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E>
|
|
||||||
where
|
|
||||||
E: de::Error,
|
|
||||||
{
|
|
||||||
Ok(SimpleOperation::Equal {
|
|
||||||
length: usize::try_from(value).unwrap_or(usize::MAX),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn visit_i64<E>(self, value: i64) -> Result<Self::Value, E>
|
|
||||||
where
|
|
||||||
E: de::Error,
|
|
||||||
{
|
|
||||||
Ok(SimpleOperation::Delete {
|
|
||||||
length: usize::try_from(-value).unwrap_or(usize::MAX),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
|
|
||||||
where
|
|
||||||
E: de::Error,
|
|
||||||
{
|
|
||||||
Ok(SimpleOperation::Insert {
|
|
||||||
text: value.to_owned(),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
deserializer.deserialize_any(OperationVisitor)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A serializable representation of the changes made to a text document
|
|
||||||
/// without the original text.
|
|
||||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
|
||||||
#[derive(Debug, Clone, PartialEq, Default)]
|
|
||||||
pub struct ChangeSet {
|
|
||||||
pub operations: Vec<SimpleOperation>,
|
|
||||||
pub cursors: Vec<CursorPosition>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ChangeSet {
|
|
||||||
#[must_use]
|
|
||||||
pub fn new(operations: Vec<SimpleOperation>, cursors: Vec<CursorPosition>) -> Self {
|
|
||||||
Self {
|
|
||||||
operations,
|
|
||||||
cursors,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
pub mod cursor_position;
|
pub mod cursor_position;
|
||||||
pub mod history;
|
pub mod history;
|
||||||
|
pub mod number_or_string;
|
||||||
pub mod side;
|
pub mod side;
|
||||||
pub mod span_with_history;
|
pub mod span_with_history;
|
||||||
pub mod text_with_cursors;
|
pub mod text_with_cursors;
|
||||||
|
|
|
||||||
74
src/types/number_or_string.rs
Normal file
74
src/types/number_or_string.rs
Normal file
|
|
@ -0,0 +1,74 @@
|
||||||
|
use std::fmt::Debug;
|
||||||
|
|
||||||
|
#[cfg(feature = "serde")]
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
#[cfg(feature = "wasm")]
|
||||||
|
use wasm_bindgen::prelude::*;
|
||||||
|
|
||||||
|
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||||
|
#[cfg_attr(feature = "serde", serde(untagged))]
|
||||||
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
|
pub enum NumberOrString {
|
||||||
|
Number(i64),
|
||||||
|
Text(String),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "wasm")]
|
||||||
|
impl TryFrom<JsValue> for NumberOrString {
|
||||||
|
type Error = DeserialisationError;
|
||||||
|
|
||||||
|
fn try_from(value: JsValue) -> Result<Self, Self::Error> {
|
||||||
|
if let Ok(num) = value.clone().try_into() {
|
||||||
|
return Ok(NumberOrString::Number(num));
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Ok(text) = value.try_into() {
|
||||||
|
return Ok(NumberOrString::Text(text));
|
||||||
|
}
|
||||||
|
|
||||||
|
Err(DeserialisationError::new(
|
||||||
|
"Could not parse JsValue as either number or string",
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "wasm")]
|
||||||
|
impl From<NumberOrString> for JsValue {
|
||||||
|
fn from(value: NumberOrString) -> Self {
|
||||||
|
match value {
|
||||||
|
NumberOrString::Number(num) => JsValue::from(num),
|
||||||
|
NumberOrString::Text(text) => JsValue::from(text),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Error type for deserialisation failures
|
||||||
|
#[cfg(feature = "wasm")]
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct DeserialisationError {
|
||||||
|
pub message: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "wasm")]
|
||||||
|
impl DeserialisationError {
|
||||||
|
pub fn new(message: impl Into<String>) -> Self {
|
||||||
|
Self {
|
||||||
|
message: message.into(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "wasm")]
|
||||||
|
impl std::fmt::Display for DeserialisationError {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(f, "Deserialisation error: {}", self.message)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "wasm")]
|
||||||
|
impl std::error::Error for DeserialisationError {}
|
||||||
|
|
||||||
|
#[cfg(feature = "wasm")]
|
||||||
|
impl From<DeserialisationError> for JsValue {
|
||||||
|
fn from(error: DeserialisationError) -> Self { JsValue::from_str(&error.message) }
|
||||||
|
}
|
||||||
|
|
@ -1,6 +1,5 @@
|
||||||
pub mod common_prefix_len;
|
pub mod common_prefix_len;
|
||||||
pub mod common_suffix_len;
|
pub mod common_suffix_len;
|
||||||
pub mod find_longest_prefix_contained_within;
|
pub mod find_longest_prefix_contained_within;
|
||||||
pub mod is_binary;
|
|
||||||
pub mod myers_diff;
|
pub mod myers_diff;
|
||||||
pub mod string_builder;
|
pub mod string_builder;
|
||||||
|
|
|
||||||
|
|
@ -1,26 +0,0 @@
|
||||||
/// Heuristically determine if the given data is a binary or a text file's
|
|
||||||
/// content.
|
|
||||||
///
|
|
||||||
/// Only text inputs can be reconciled using the crate's functions.
|
|
||||||
#[must_use]
|
|
||||||
pub fn is_binary(data: &[u8]) -> bool {
|
|
||||||
if data.contains(&0) {
|
|
||||||
// Even though the NUL character is valid in UTF-8, it's highly suspicious in
|
|
||||||
// human-readable text.
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::str::from_utf8(data).is_err()
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_is_binary() {
|
|
||||||
assert!(is_binary(&[0, 159, 146, 150]));
|
|
||||||
assert!(is_binary(&[0, 12]));
|
|
||||||
assert!(!is_binary(b"hello"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -87,7 +87,7 @@ struct V {
|
||||||
impl V {
|
impl V {
|
||||||
fn new(max_d: usize) -> Self {
|
fn new(max_d: usize) -> Self {
|
||||||
// max_d should fit in isize for the algorithm to work correctly
|
// max_d should fit in isize for the algorithm to work correctly
|
||||||
let offset = isize::try_from(max_d).unwrap_or(isize::MAX);
|
let offset = isize::try_from(max_d).expect("max_d must fit in isize");
|
||||||
Self {
|
Self {
|
||||||
offset,
|
offset,
|
||||||
v: vec![0; 2 * max_d],
|
v: vec![0; 2 * max_d],
|
||||||
|
|
@ -101,16 +101,15 @@ impl Index<isize> for V {
|
||||||
type Output = usize;
|
type Output = usize;
|
||||||
|
|
||||||
fn index(&self, index: isize) -> &Self::Output {
|
fn index(&self, index: isize) -> &Self::Output {
|
||||||
let idx = usize::try_from(index + self.offset).unwrap_or(usize::MAX);
|
let idx = usize::try_from(index + self.offset).expect("index + offset must fit in usize");
|
||||||
&self.v[idx.min(self.v.len().saturating_sub(1))]
|
&self.v[idx]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl IndexMut<isize> for V {
|
impl IndexMut<isize> for V {
|
||||||
fn index_mut(&mut self, index: isize) -> &mut Self::Output {
|
fn index_mut(&mut self, index: isize) -> &mut Self::Output {
|
||||||
let idx = usize::try_from(index + self.offset).unwrap_or(usize::MAX);
|
let idx = usize::try_from(index + self.offset).expect("index + offset must fit in usize");
|
||||||
let len = self.v.len();
|
&mut self.v[idx]
|
||||||
&mut self.v[idx.min(len.saturating_sub(1))]
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -145,7 +144,8 @@ where
|
||||||
|
|
||||||
// By Lemma 1 in the paper, the optimal edit script length is odd or even as
|
// By Lemma 1 in the paper, the optimal edit script length is odd or even as
|
||||||
// `delta` is odd or even.
|
// `delta` is odd or even.
|
||||||
let delta = isize::try_from(n).unwrap_or(isize::MAX) - isize::try_from(m).unwrap_or(isize::MAX);
|
let delta = isize::try_from(n).expect("n must fit in isize")
|
||||||
|
- isize::try_from(m).expect("m must fit in isize");
|
||||||
let odd = delta & 1 == 1;
|
let odd = delta & 1 == 1;
|
||||||
|
|
||||||
// The initial point at (0, -1)
|
// The initial point at (0, -1)
|
||||||
|
|
@ -157,7 +157,7 @@ where
|
||||||
assert!(vf.len() >= d_max);
|
assert!(vf.len() >= d_max);
|
||||||
assert!(vb.len() >= d_max);
|
assert!(vb.len() >= d_max);
|
||||||
|
|
||||||
let d_max_isize = isize::try_from(d_max).unwrap_or(isize::MAX);
|
let d_max_isize = isize::try_from(d_max).expect("d_max must fit in isize");
|
||||||
for d in 0..d_max_isize {
|
for d in 0..d_max_isize {
|
||||||
// Forward path
|
// Forward path
|
||||||
for k in (-d..=d).rev().step_by(2) {
|
for k in (-d..=d).rev().step_by(2) {
|
||||||
|
|
@ -166,7 +166,8 @@ where
|
||||||
} else {
|
} else {
|
||||||
vf[k - 1] + 1
|
vf[k - 1] + 1
|
||||||
};
|
};
|
||||||
let y = usize::try_from(isize::try_from(x).unwrap_or(isize::MAX) - k).unwrap_or(0);
|
let y = usize::try_from(isize::try_from(x).expect("x must fit in isize") - k)
|
||||||
|
.expect("x - k must be non-negative and fit in usize");
|
||||||
|
|
||||||
// The coordinate of the start of a snake
|
// The coordinate of the start of a snake
|
||||||
let (x0, y0) = (x, y);
|
let (x0, y0) = (x, y);
|
||||||
|
|
@ -204,7 +205,8 @@ where
|
||||||
} else {
|
} else {
|
||||||
vb[k - 1] + 1
|
vb[k - 1] + 1
|
||||||
};
|
};
|
||||||
let mut y = usize::try_from(isize::try_from(x).unwrap_or(isize::MAX) - k).unwrap_or(0);
|
let mut y = usize::try_from(isize::try_from(x).expect("x must fit in isize") - k)
|
||||||
|
.expect("x - k must be non-negative and fit in usize");
|
||||||
|
|
||||||
// The coordinate of the start of a snake
|
// The coordinate of the start of a snake
|
||||||
if x < n && y < m {
|
if x < n && y < m {
|
||||||
|
|
|
||||||
119
src/wasm.rs
119
src/wasm.rs
|
|
@ -3,7 +3,7 @@ use core::str;
|
||||||
|
|
||||||
use wasm_bindgen::prelude::*;
|
use wasm_bindgen::prelude::*;
|
||||||
|
|
||||||
use crate::{BuiltinTokenizer, CursorPosition, SpanWithHistory, TextWithCursors};
|
use crate::{BuiltinTokenizer, CursorPosition, EditedText, SpanWithHistory, TextWithCursors};
|
||||||
|
|
||||||
#[global_allocator]
|
#[global_allocator]
|
||||||
static ALLOC: wee_alloc::WeeAlloc<'_> = wee_alloc::WeeAlloc::INIT;
|
static ALLOC: wee_alloc::WeeAlloc<'_> = wee_alloc::WeeAlloc::INIT;
|
||||||
|
|
@ -32,6 +32,7 @@ pub fn reconcile_with_history(
|
||||||
tokenizer: BuiltinTokenizer,
|
tokenizer: BuiltinTokenizer,
|
||||||
) -> TextWithCursorsAndHistory {
|
) -> TextWithCursorsAndHistory {
|
||||||
set_panic_hook();
|
set_panic_hook();
|
||||||
|
|
||||||
let reconciled = crate::reconcile(parent, left, right, &*tokenizer);
|
let reconciled = crate::reconcile(parent, left, right, &*tokenizer);
|
||||||
let text_with_cursors = reconciled.apply();
|
let text_with_cursors = reconciled.apply();
|
||||||
|
|
||||||
|
|
@ -54,10 +55,6 @@ pub fn reconcile_with_history(
|
||||||
/// # Returns
|
/// # Returns
|
||||||
///
|
///
|
||||||
/// The merged document.
|
/// The merged document.
|
||||||
///
|
|
||||||
/// # Panics
|
|
||||||
///
|
|
||||||
/// If any of the input documents are not valid UTF-8 strings.
|
|
||||||
#[wasm_bindgen(js_name = genericReconcile)]
|
#[wasm_bindgen(js_name = genericReconcile)]
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub fn generic_reconcile(
|
pub fn generic_reconcile(
|
||||||
|
|
@ -68,51 +65,56 @@ pub fn generic_reconcile(
|
||||||
) -> Vec<u8> {
|
) -> Vec<u8> {
|
||||||
set_panic_hook();
|
set_panic_hook();
|
||||||
|
|
||||||
if crate::is_binary(parent) || crate::is_binary(left) || crate::is_binary(right) {
|
if let (Some(parent), Some(left), Some(right)) = (
|
||||||
right.to_vec()
|
string_or_nothing(parent),
|
||||||
|
string_or_nothing(left),
|
||||||
|
string_or_nothing(right),
|
||||||
|
) {
|
||||||
|
crate::reconcile(&parent, &left.into(), &right.into(), &*tokenizer)
|
||||||
|
.apply()
|
||||||
|
.text()
|
||||||
|
.into_bytes()
|
||||||
} else {
|
} else {
|
||||||
crate::reconcile(
|
right.to_vec()
|
||||||
str::from_utf8(parent).expect("parent must be valid UTF-8 because it's not binary"),
|
}
|
||||||
&str::from_utf8(left)
|
}
|
||||||
.expect("left must be valid UTF-8 because it's not binary")
|
|
||||||
.into(),
|
/// WASM wrapper around getting a compact diff representation of two texts as a
|
||||||
&str::from_utf8(right)
|
/// list of numbers and strings.
|
||||||
.expect("right must be valid UTF-8 because it's not binary")
|
#[wasm_bindgen(js_name = diff)]
|
||||||
.into(),
|
#[must_use]
|
||||||
|
pub fn diff(parent: &str, changed: &TextWithCursors, tokenizer: BuiltinTokenizer) -> Vec<JsValue> {
|
||||||
|
set_panic_hook();
|
||||||
|
|
||||||
|
let edited_text = EditedText::from_strings_with_tokenizer(parent, changed, &*tokenizer);
|
||||||
|
edited_text
|
||||||
|
.to_diff()
|
||||||
|
.into_iter()
|
||||||
|
.map(std::convert::Into::into)
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Inverse of `diff`, applies a compact diff representation to a parent text
|
||||||
|
///
|
||||||
|
/// # Panics
|
||||||
|
///
|
||||||
|
/// Panics if the diff format is invalid or there's an integer overflow when
|
||||||
|
/// applying the diff.
|
||||||
|
#[wasm_bindgen(js_name = undiff)]
|
||||||
|
#[must_use]
|
||||||
|
pub fn undiff(parent: &str, diff: Vec<JsValue>, tokenizer: BuiltinTokenizer) -> String {
|
||||||
|
set_panic_hook();
|
||||||
|
|
||||||
|
EditedText::from_diff(
|
||||||
|
parent,
|
||||||
|
diff.into_iter()
|
||||||
|
.map(std::convert::TryInto::try_into)
|
||||||
|
.collect::<Result<_, _>>()
|
||||||
|
.expect("Invalid diff format"),
|
||||||
&*tokenizer,
|
&*tokenizer,
|
||||||
)
|
)
|
||||||
.apply()
|
.apply()
|
||||||
.text()
|
.text()
|
||||||
.into_bytes()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// WASM wrapper around getting a compact diff representation as a JSON string
|
|
||||||
///
|
|
||||||
/// # Panics
|
|
||||||
///
|
|
||||||
/// If serialization to JSON fails which should not happen
|
|
||||||
#[wasm_bindgen(js_name = getCompactDiff)]
|
|
||||||
#[must_use]
|
|
||||||
pub fn get_compact_diff(
|
|
||||||
parent: &str,
|
|
||||||
changed: &TextWithCursors,
|
|
||||||
tokenizer: BuiltinTokenizer,
|
|
||||||
) -> String {
|
|
||||||
set_panic_hook();
|
|
||||||
let edited_text = crate::EditedText::from_strings_with_tokenizer(parent, changed, &*tokenizer);
|
|
||||||
let change_set = edited_text.to_change_set();
|
|
||||||
|
|
||||||
serde_json::to_string(&change_set).expect("Failed to serialize change set")
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Heuristically determine if the given data is a binary or a text file's
|
|
||||||
/// content.
|
|
||||||
#[wasm_bindgen(js_name = isBinary)]
|
|
||||||
#[must_use]
|
|
||||||
pub fn is_binary(data: &[u8]) -> bool {
|
|
||||||
set_panic_hook();
|
|
||||||
crate::is_binary(data)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn set_panic_hook() {
|
fn set_panic_hook() {
|
||||||
|
|
@ -140,3 +142,30 @@ impl TextWithCursorsAndHistory {
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub fn history(&self) -> Vec<SpanWithHistory> { self.history.clone() }
|
pub fn history(&self) -> Vec<SpanWithHistory> { self.history.clone() }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns the UTF8 parsed string if it's a text, or `None` if it's likely
|
||||||
|
/// binary.
|
||||||
|
#[must_use]
|
||||||
|
fn string_or_nothing(data: &[u8]) -> Option<String> {
|
||||||
|
if data.contains(&0) {
|
||||||
|
// Even though the NUL character is valid in UTF-8, it's highly suspicious in
|
||||||
|
// human-readable text.
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::str::from_utf8(data)
|
||||||
|
.map(std::borrow::ToOwned::to_owned)
|
||||||
|
.ok()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_string_or_nothing() {
|
||||||
|
assert_eq!(string_or_nothing(&[0, 159, 146, 150]), None);
|
||||||
|
assert_eq!(string_or_nothing(&[0, 12]), None);
|
||||||
|
assert_eq!(string_or_nothing(b"hello"), Some("hello".into()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@ mod example_document;
|
||||||
use std::{fs, path::Path};
|
use std::{fs, path::Path};
|
||||||
|
|
||||||
use example_document::ExampleDocument;
|
use example_document::ExampleDocument;
|
||||||
use reconcile_text::{BuiltinTokenizer, EditedText, reconcile};
|
use reconcile_text::{BuiltinTokenizer, reconcile};
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
@ -34,8 +34,11 @@ fn test_document_one_way_with_cursors() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "serde")]
|
||||||
#[test]
|
#[test]
|
||||||
fn test_document_one_way_with_cursors_and_serialisation() {
|
fn test_document_one_way_with_serialisation() {
|
||||||
|
use reconcile_text::EditedText;
|
||||||
|
|
||||||
for doc in &get_all_documents() {
|
for doc in &get_all_documents() {
|
||||||
let parent = doc.parent();
|
let parent = doc.parent();
|
||||||
let left_operations =
|
let left_operations =
|
||||||
|
|
@ -47,19 +50,23 @@ fn test_document_one_way_with_cursors_and_serialisation() {
|
||||||
);
|
);
|
||||||
|
|
||||||
let serialised_left =
|
let serialised_left =
|
||||||
serde_yaml::from_str(&serde_yaml::to_string(&left_operations.to_change_set()).unwrap())
|
serde_yaml::from_str(&serde_yaml::to_string(&left_operations.to_diff()).unwrap())
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let serialised_right = serde_yaml::from_str(
|
let serialised_right =
|
||||||
&serde_yaml::to_string(&right_operations.to_change_set()).unwrap(),
|
serde_yaml::from_str(&serde_yaml::to_string(&right_operations.to_diff()).unwrap())
|
||||||
)
|
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let restored_left_operations =
|
let restored_left_operations =
|
||||||
EditedText::from_change_set(&parent, serialised_left, &*BuiltinTokenizer::Word);
|
EditedText::from_diff(&parent, serialised_left, &*BuiltinTokenizer::Word);
|
||||||
let restored_right_operations =
|
let restored_right_operations =
|
||||||
EditedText::from_change_set(&parent, serialised_right, &*BuiltinTokenizer::Word);
|
EditedText::from_diff(&parent, serialised_right, &*BuiltinTokenizer::Word);
|
||||||
|
|
||||||
doc.assert_eq(&restored_left_operations.merge(restored_right_operations));
|
doc.assert_eq_without_cursors(
|
||||||
|
&restored_left_operations
|
||||||
|
.merge(restored_right_operations)
|
||||||
|
.apply()
|
||||||
|
.text(),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -55,22 +55,16 @@ fn test_merge_binary() {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[wasm_bindgen_test(unsupported = test)]
|
#[wasm_bindgen_test] // JsValue isn't supported outside of wasm
|
||||||
fn test_is_binary() {
|
fn test_diff() {
|
||||||
assert!(is_binary(&[0, 159, 146, 150]));
|
|
||||||
assert!(is_binary(&[0, 12]));
|
|
||||||
assert!(!is_binary(b"hello"));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[wasm_bindgen_test(unsupported = test)]
|
|
||||||
fn test_get_compact_diff() {
|
|
||||||
let parent = "hello ";
|
let parent = "hello ";
|
||||||
let changed = "world";
|
let changed = "world";
|
||||||
let result = get_compact_diff(parent, &changed.into(), BuiltinTokenizer::Word);
|
|
||||||
assert_eq!(result, "{\"operations\":[-6,\"world\"],\"cursors\":[]}");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[wasm_bindgen_test(unsupported = test)]
|
let result = diff(parent, &changed.into(), BuiltinTokenizer::Word);
|
||||||
fn test_is_binary_empty() {
|
|
||||||
assert!(!is_binary(b""));
|
assert_eq!(result.len(), 2);
|
||||||
|
let first: i64 = result[0].clone().try_into().unwrap();
|
||||||
|
let second: String = result[1].clone().try_into().unwrap();
|
||||||
|
assert_eq!(first, -6);
|
||||||
|
assert_eq!(second, "world");
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue