Improve docs and compare with alternatives
This commit is contained in:
parent
5962feb90a
commit
3d382ad741
14 changed files with 106 additions and 69 deletions
|
|
@ -59,7 +59,7 @@
|
|||
//!
|
||||
//! For specialised use cases, such as structured languages, custom
|
||||
//! tokenisation logic can be implemented by providing a function with the
|
||||
//! signature `Fn(&str) -> Vec<Token<String>>`::
|
||||
//! signature `Fn(&str) -> Vec<Token<String>>`:
|
||||
//!
|
||||
//! ```
|
||||
//! use reconcile_text::{reconcile, Token, BuiltinTokenizer};
|
||||
|
|
@ -151,10 +151,11 @@
|
|||
//! ]
|
||||
//! );
|
||||
//! ```
|
||||
//!
|
||||
//! ## Efficiently serialize changes
|
||||
//!
|
||||
//! The edits can be serialized into a compact representation without the full
|
||||
//! original text, making the size only depends on the changes made.
|
||||
//! original text, making the size depend only on the changes made.
|
||||
//!
|
||||
//! ```rust
|
||||
//! # #[cfg(feature = "serde")]
|
||||
|
|
|
|||
|
|
@ -18,18 +18,16 @@ use crate::{
|
|||
utils::string_builder::StringBuilder,
|
||||
};
|
||||
|
||||
/// A text document and a sequence of operations that can be applied to the text
|
||||
/// document. `EditedText` supports merging two sequences of operations using
|
||||
/// the principles of Operational Transformation.
|
||||
/// A text document with a sequence of operations derived from diffing it
|
||||
/// against an updated version. Supports merging two `EditedText` instances
|
||||
/// (from the same original) via Operational Transformation.
|
||||
///
|
||||
/// It's mainly created through the `from_strings` method, then merged with
|
||||
/// another `EditedText` derived from the same original text and then applied to
|
||||
/// the original text to get the reconciled text of concurrent edits.
|
||||
/// Created via `from_strings`, `from_strings_with_tokenizer`, or `from_diff`,
|
||||
/// then merged with another `EditedText` and applied to get the reconciled
|
||||
/// text.
|
||||
///
|
||||
/// In addition to text and operations, it also keeps track of cursor positions
|
||||
/// in the original text. The cursor positions are updated when the operations
|
||||
/// are applied, so that the cursor positions can be used to restore the
|
||||
/// cursor positions in the updated text.
|
||||
/// Also tracks cursor positions from the updated text, repositioning them
|
||||
/// when operations are applied.
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone, PartialEq, Default)]
|
||||
pub struct EditedText<'a, T>
|
||||
|
|
@ -43,12 +41,8 @@ where
|
|||
}
|
||||
|
||||
impl<'a> EditedText<'a, String> {
|
||||
/// Create an `EditedText` from the given original (old) and updated (new)
|
||||
/// strings. The returned `EditedText` represents the changes from the
|
||||
/// original to the updated text. When the return value is applied to
|
||||
/// the original text, it will result in the updated text. The default
|
||||
/// word tokenizer is used to tokenize the text which splits the text on
|
||||
/// whitespaces.
|
||||
/// Create an `EditedText` from the given original and updated strings.
|
||||
/// Uses the default word tokenizer (splits on word boundaries).
|
||||
#[must_use]
|
||||
pub fn from_strings(original: &'a str, updated: &TextWithCursors) -> Self {
|
||||
Self::from_strings_with_tokenizer(original, updated, &*BuiltinTokenizer::Word)
|
||||
|
|
@ -59,11 +53,8 @@ impl<'a, T> EditedText<'a, T>
|
|||
where
|
||||
T: PartialEq + Clone + Debug,
|
||||
{
|
||||
/// Create an `EditedText` from the given original (old) and updated (new)
|
||||
/// strings. The returned `EditedText` represents the changes from the
|
||||
/// original to the updated text. When the return value is applied to
|
||||
/// the original text, it will result in the updated text. The tokenizer
|
||||
/// function is used to tokenize the text.
|
||||
/// Create an `EditedText` from the given original and updated strings
|
||||
/// using the provided tokenizer.
|
||||
pub fn from_strings_with_tokenizer(
|
||||
original: &'a str,
|
||||
updated: &TextWithCursors,
|
||||
|
|
@ -110,7 +101,7 @@ where
|
|||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Panics if there's an integer overflow (in i64) when calculating new
|
||||
/// Panics if there's an integer overflow (in isize) when calculating new
|
||||
/// cursor positions.
|
||||
#[must_use]
|
||||
#[allow(clippy::too_many_lines)]
|
||||
|
|
@ -280,7 +271,7 @@ where
|
|||
/// Apply the operations to the text and return the resulting text in chunks
|
||||
/// together with the provenance describing where each chunk came from.
|
||||
///
|
||||
/// The result includes deleted spans as well.
|
||||
/// Returns all spans including deletions (not present in the merged text).
|
||||
///
|
||||
/// ```
|
||||
/// use reconcile_text::{History, SpanWithHistory, BuiltinTokenizer, reconcile};
|
||||
|
|
@ -422,7 +413,7 @@ where
|
|||
result
|
||||
}
|
||||
|
||||
/// Deserialize an `EditedText` from a change list and the original text.
|
||||
/// Reconstruct an `EditedText` from a diff and the original text.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
|
|
|
|||
|
|
@ -46,9 +46,8 @@ impl<T> Operation<T>
|
|||
where
|
||||
T: PartialEq + Clone + Debug,
|
||||
{
|
||||
/// Creates an equal operation with the given index.
|
||||
/// This operation is used to indicate that the text at the given index
|
||||
/// is unchanged.
|
||||
/// Creates an equal (retain) operation starting at the given character
|
||||
/// offset in the original text.
|
||||
pub fn create_equal(order: usize, length: usize) -> Self {
|
||||
Operation::Equal {
|
||||
order,
|
||||
|
|
@ -69,13 +68,14 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
/// Creates an insert operation with the given index and text.
|
||||
/// Creates an insert operation at the given character offset with the
|
||||
/// given tokens.
|
||||
pub fn create_insert(order: usize, text: Vec<Token<T>>) -> Self {
|
||||
Operation::Insert { order, text }
|
||||
}
|
||||
|
||||
/// Creates a delete operation with the given index and number of
|
||||
/// to-be-deleted characters.
|
||||
/// Creates a delete operation at the given character offset for the
|
||||
/// specified number of characters.
|
||||
pub fn create_delete(order: usize, deleted_character_count: usize) -> Self {
|
||||
Operation::Delete {
|
||||
order,
|
||||
|
|
@ -179,8 +179,8 @@ where
|
|||
builder
|
||||
}
|
||||
|
||||
/// Returns the number of affected characters. It is always greater than 0
|
||||
/// because empty operations cannot be created.
|
||||
/// Returns the number of affected characters. May be 0 after
|
||||
/// `merge_operations`.
|
||||
pub fn len(&self) -> usize {
|
||||
match self {
|
||||
Operation::Equal { length, .. } => *length,
|
||||
|
|
@ -192,10 +192,9 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
/// Merges the operation with the given context, producing a new operation
|
||||
/// and updating the context. This implements a comples FSM that handles
|
||||
/// the merging of operations in a way that is consistent with the text.
|
||||
/// The contexts are updated in-place.
|
||||
/// Adjusts this operation based on `previous_operation` from the other side
|
||||
/// to avoid duplicating or conflicting changes. Updates
|
||||
/// `previous_operation` in-place.
|
||||
#[allow(clippy::too_many_lines)]
|
||||
pub fn merge_operations(self, previous_operation: &mut Option<Self>) -> Operation<T> {
|
||||
let operation = self;
|
||||
|
|
|
|||
|
|
@ -2,9 +2,9 @@ use std::fmt::Debug;
|
|||
|
||||
use crate::{tokenizer::token::Token, utils::myers_diff::myers_diff};
|
||||
|
||||
/// Text editing operation containing the to-be-changed `Tokens`-s.
|
||||
/// Text editing operation containing the affected tokens.
|
||||
///
|
||||
/// `RawOperations` can be joined together when the underlying tokens
|
||||
/// `RawOperation`s can be joined together when the underlying tokens
|
||||
/// allow for joining subsequent operations.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum RawOperation<T>
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ use wasm_bindgen::prelude::*;
|
|||
|
||||
pub mod token;
|
||||
|
||||
/// A trait for tokenizers that take a string and return a list of tokens.
|
||||
/// Type alias for tokenizer functions that split a string into tokens.
|
||||
pub type Tokenizer<T> = dyn Fn(&str) -> Vec<Token<T>>;
|
||||
|
||||
#[cfg_attr(feature = "wasm", wasm_bindgen)]
|
||||
|
|
|
|||
|
|
@ -3,13 +3,11 @@ use std::fmt::Debug;
|
|||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// A token is a string that has been normalized in some way.
|
||||
/// A token with a normalized form (used for diffing) and an original form
|
||||
/// (used when applying operations). Joinability flags control whether
|
||||
/// adjacent insertions interleave or group.
|
||||
///
|
||||
/// A token consists of the normalized form is used for comparison, and the
|
||||
/// original form used for subsequently applying `Operation`-s to a text
|
||||
/// document.
|
||||
///
|
||||
/// It's UTF-8 compatible.
|
||||
/// UTF-8 compatible.
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Token<T>
|
||||
|
|
|
|||
|
|
@ -15,8 +15,7 @@ pub enum History {
|
|||
RemovedFromRight = "RemovedFromRight",
|
||||
}
|
||||
|
||||
/// Simple enum for describing the result of `reconcile` in a flat list.
|
||||
/// When compiled to WASM, the enum values are the same as their names.
|
||||
/// Provenance label for each span returned by `apply_with_history`.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
#[cfg(not(feature = "wasm"))]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
|
|
|
|||
|
|
@ -27,6 +27,10 @@ impl TryFrom<JsValue> for NumberOrText {
|
|||
}
|
||||
|
||||
if let Some(num) = value.clone().as_f64() {
|
||||
if num.is_nan() {
|
||||
return Err(DeserialisationError::new("NaN is not a valid number"));
|
||||
}
|
||||
|
||||
if num.abs() > INTEGRAL_LIMIT {
|
||||
return Err(DeserialisationError::new(
|
||||
"Floating-point number exceeds safe integer limit, use BigInt instead",
|
||||
|
|
|
|||
|
|
@ -5,8 +5,7 @@ use wasm_bindgen::prelude::*;
|
|||
|
||||
use crate::types::history::History;
|
||||
|
||||
/// Wrapper type for `(String, History)` where History describes the origin of
|
||||
/// `text`.
|
||||
/// A text span annotated with its origin in a merge result.
|
||||
#[allow(clippy::unsafe_derive_deserialize)]
|
||||
#[cfg_attr(feature = "wasm", wasm_bindgen)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
|
|
|
|||
|
|
@ -12,12 +12,15 @@ pub struct TextWithCursors {
|
|||
|
||||
#[cfg_attr(feature = "wasm", wasm_bindgen)]
|
||||
impl TextWithCursors {
|
||||
/// # Panics
|
||||
///
|
||||
/// Panics if any cursor's `char_index` exceeds the text's character length.
|
||||
#[cfg_attr(feature = "wasm", wasm_bindgen(constructor))]
|
||||
#[must_use]
|
||||
pub fn new(text: String, cursors: Vec<CursorPosition>) -> Self {
|
||||
let length = text.chars().count();
|
||||
for cursor in &cursors {
|
||||
debug_assert!(
|
||||
assert!(
|
||||
cursor.char_index <= length,
|
||||
// cursor.char_index == length means that the cursor is at the end
|
||||
"Cursor positions ({}) must be contained within the text (of length {length}) or \
|
||||
|
|
|
|||
|
|
@ -1,9 +1,8 @@
|
|||
use std::{fmt, iter::Iterator};
|
||||
|
||||
/// A helper for building a string in-order based on an original string and a
|
||||
/// series of insertions, deletions, and copies applied to it. It is safe to use
|
||||
/// with UTF-8 strings as all operations are based on character indices. The
|
||||
/// methods must be called in-order.
|
||||
/// A helper for building a string sequentially from an original string via
|
||||
/// insertions, deletions, and copies. All operations use character counts,
|
||||
/// safe for UTF-8. Methods must be called in-order.
|
||||
pub struct StringBuilder<'a> {
|
||||
original: Box<dyn Iterator<Item = char> + 'a>,
|
||||
buffer: String,
|
||||
|
|
|
|||
10
src/wasm.rs
10
src/wasm.rs
|
|
@ -22,7 +22,7 @@ pub fn reconcile(
|
|||
crate::reconcile(parent, left, right, &*tokenizer).apply()
|
||||
}
|
||||
|
||||
/// WASM wrapper around `crate::reconcile` for merging text.
|
||||
/// WASM wrapper around `crate::reconcile` that also returns provenance history.
|
||||
#[wasm_bindgen(js_name = reconcileWithHistory)]
|
||||
#[must_use]
|
||||
pub fn reconcile_with_history(
|
||||
|
|
@ -94,12 +94,12 @@ pub fn diff(parent: &str, changed: &TextWithCursors, tokenizer: BuiltinTokenizer
|
|||
.collect()
|
||||
}
|
||||
|
||||
/// Inverse of `diff`, applies a compact diff representation to a parent text
|
||||
/// Inverse of `diff`, applies a compact diff representation to a parent text.
|
||||
///
|
||||
/// # Panics
|
||||
/// # Errors
|
||||
///
|
||||
/// Panics if the diff format is invalid or there's an integer overflow when
|
||||
/// applying the diff.
|
||||
/// Returns a JS error if the diff format is invalid or references ranges
|
||||
/// exceeding the original text length.
|
||||
#[wasm_bindgen(js_name = undiff)]
|
||||
#[must_use]
|
||||
pub fn undiff(parent: &str, diff: Vec<JsValue>, tokenizer: BuiltinTokenizer) -> String {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue