Remove the exponential API

This commit is contained in:
Andras Schmelczer 2025-06-29 19:03:55 +01:00
parent 0448e30dd9
commit 4fda83fe17
8 changed files with 248 additions and 150 deletions

View file

@ -1,54 +1,49 @@
mod cursor;
mod edited_text;
mod operation;
mod utils;
use std::fmt::Debug;
pub use cursor::{CursorPosition, TextWithCursors};
pub use edited_text::EditedText;
pub use operation::Operation;
use crate::{
Tokenizer,
utils::{history::History, side::Side},
types::{side::Side, text_with_cursors::TextWithCursors},
};
/// Given an `original` document and two concurrent edits to it,
/// return a document containing all changes from both `left`
/// and `right`.
///
/// If a span has been inserted in either the `left` or `right`
/// versions, it will be present in the return value. If both sides
/// insert the same span with a common prefix, that prefix will only
/// be present once in the output.
///
/// Deletes are preserved from both sides. This means that an insert
/// from one side into a deleted span from the other side will result
/// in the removal of the original span but keeping the inserted text.
///
/// The function supports UTF-8. The arguments are tokenized at the
/// granularity of words.
///
/// ```
/// use reconcile::{reconcile, BuiltinTokenizer};
///
/// let parent = "Merging text is hard!";
/// let left = "Merging text is easy!";
/// let right = "With reconcile, merging documents is hard!";
///
/// let deconflicted = reconcile(parent, &left.into(), &right.into(), &*BuiltinTokenizer::Word);
/// assert_eq!(deconflicted.apply().text(), "With reconcile, merging documents is easy!");
/// ```
#[must_use]
pub fn reconcile(original: &str, left: &str, right: &str) -> String {
reconcile_with_cursors(original, left.into(), right.into())
.text
.to_string()
}
#[must_use]
pub fn reconcile_with_history(original: &str, left: &str, right: &str) -> Vec<(History, String)> {
let left_operations = EditedText::from_strings(original, left.into(), Side::Left);
let right_operations = EditedText::from_strings(original, right.into(), Side::Right);
left_operations.merge(right_operations).apply_with_history()
}
#[must_use]
pub fn reconcile_with_cursors<'a>(
pub fn reconcile<'a, T>(
original: &'a str,
left: TextWithCursors<'a>,
right: TextWithCursors<'a>,
) -> TextWithCursors<'static> {
let left_operations = EditedText::from_strings(original, left, Side::Left);
let right_operations = EditedText::from_strings(original, right, Side::Right);
let merged_operations = left_operations.merge(right_operations);
TextWithCursors::new_owned(merged_operations.apply(), merged_operations.cursors)
}
#[must_use]
pub fn reconcile_with_tokenizer<'a, F, T>(
original: &str,
left: TextWithCursors<'a>,
right: TextWithCursors<'a>,
left: &TextWithCursors,
right: &TextWithCursors,
tokenizer: &Tokenizer<T>,
) -> TextWithCursors<'static>
) -> EditedText<'a, T>
where
T: PartialEq + Clone + Debug,
{
@ -57,9 +52,7 @@ where
let right_operations =
EditedText::from_strings_with_tokenizer(original, right, tokenizer, Side::Right);
let merged_operations = left_operations.merge(right_operations);
TextWithCursors::new_owned(merged_operations.apply(), merged_operations.cursors)
left_operations.merge(right_operations)
}
#[cfg(test)]
@ -70,13 +63,13 @@ mod test {
use test_case::test_matrix;
use super::*;
use crate::CursorPosition;
use crate::{BuiltinTokenizer, CursorPosition, types::text_with_cursors::TextWithCursors};
#[test]
fn test_cursor_complex() {
let original = "this is some complex text to test cursor positions";
let original: &'static str = "this is some complex text to test cursor positions";
let left = TextWithCursors::new(
"this is really complex text for testing cursor positions",
"this is really complex text for testing cursor positions".to_owned(),
vec![
CursorPosition {
id: 0,
@ -89,7 +82,7 @@ mod test {
],
);
let right = TextWithCursors::new(
"that was some complex sample to test cursor movements",
"that was some complex sample to test cursor movements".to_owned(),
vec![
CursorPosition {
id: 2,
@ -102,31 +95,31 @@ mod test {
],
);
let merged = reconcile_with_cursors(original, left, right);
let merged = reconcile(original, &left, &right, &*BuiltinTokenizer::Word).apply();
assert_eq!(
merged,
TextWithCursors::new(
"that was really complex sample for testing cursor movements",
vec![
CursorPosition {
id: 2,
char_index: 5
}, // unchanged
CursorPosition {
id: 0,
char_index: 9
}, // before "really"
CursorPosition {
id: 1,
char_index: 23
}, // inside of "s|ample" because "text" got replaced by "sample"
CursorPosition {
id: 3,
char_index: 30
}, // after "complex sample"
]
)
&merged.text(),
"that was really complex sample for testing cursor movements"
);
assert_eq!(
merged.cursors(),
vec![
CursorPosition {
id: 2,
char_index: 5
}, // unchanged
CursorPosition {
id: 0,
char_index: 9
}, // before "really"
CursorPosition {
id: 1,
char_index: 23
}, // inside of "s|ample" because "text" got replaced by "sample"
CursorPosition {
id: 3,
char_index: 30
}, // after "complex sample"
]
);
}
@ -174,6 +167,11 @@ mod test {
})
.collect::<Vec<_>>();
let _ = reconcile(&contents[0], &contents[1], &contents[2]);
let _ = reconcile(
&contents[0],
&(&contents[1]).into(),
&(&contents[2]).into(),
&*BuiltinTokenizer::Word,
);
}
}

View file

@ -4,13 +4,13 @@ use std::fmt::Debug;
use serde::{Deserialize, Serialize};
use crate::{
CursorPosition, TextWithCursors,
BuiltinTokenizer, CursorPosition, TextWithCursors,
operation_transformation::{
Operation,
utils::{cook_operations::cook_operations, elongate_operations::elongate_operations},
},
raw_operation::RawOperation,
tokenizer::{Tokenizer, word_tokenizer::word_tokenizer},
tokenizer::Tokenizer,
types::{history::History, side::Side, text_with_history::TextWithHistory},
utils::string_builder::StringBuilder,
};
@ -27,6 +27,7 @@ use crate::{
/// in the original text. The cursor positions are updated when the operations
/// are applied, so that the cursor positions can be used to restore the
/// cursor positions in the updated text.
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone, PartialEq, Default)]
pub struct EditedText<'a, T>
@ -35,7 +36,7 @@ where
{
text: &'a str,
operations: Vec<Operation<T>>,
pub(crate) cursors: Vec<CursorPosition>,
cursors: Vec<CursorPosition>,
}
impl<'a> EditedText<'a, String> {
@ -47,7 +48,7 @@ impl<'a> EditedText<'a, String> {
/// whitespaces.
#[must_use]
pub fn from_strings(original: &'a str, updated: &TextWithCursors, side: Side) -> Self {
Self::from_strings_with_tokenizer(original, updated, &word_tokenizer, side)
Self::from_strings_with_tokenizer(original, updated, &*BuiltinTokenizer::Word, side)
}
}
@ -219,14 +220,14 @@ where
/// Apply the operations to the text and return the resulting text.
#[must_use]
pub fn apply(&self) -> String {
pub fn apply(&self) -> TextWithCursors {
let mut builder: StringBuilder<'_> = StringBuilder::new(self.text);
for operation in &self.operations {
builder = operation.apply(builder);
}
builder.take()
TextWithCursors::new(builder.take(), self.cursors.clone())
}
#[must_use]
@ -291,7 +292,7 @@ mod tests {
insta::assert_debug_snapshot!(operations);
let new_right = operations.apply();
assert_eq!(new_right.to_string(), right);
assert_eq!(new_right.text(), right);
}
#[test]
@ -303,7 +304,7 @@ mod tests {
assert_debug_snapshot!(operations);
let new_right = operations.apply();
assert_eq!(new_right.to_string(), text);
assert_eq!(new_right.text(), text);
}
#[test]
@ -317,6 +318,6 @@ mod tests {
let operations_2 = EditedText::from_strings(original, &right.into(), Side::Right);
let operations = operations_1.merge(operations_2);
assert_eq!(operations.apply(), expected);
assert_eq!(operations.apply().text(), expected);
}
}

View file

@ -1,7 +1,44 @@
mod word_tokenizer;
use std::ops::Deref;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use token::Token;
#[cfg(feature = "wasm")]
use wasm_bindgen::prelude::*;
pub mod token;
pub mod word_tokenizer;
/// A trait for tokenizers that take a string and return a list of tokens.
pub type Tokenizer<T> = dyn Fn(&str) -> Vec<Token<T>>;
#[cfg_attr(feature = "wasm", wasm_bindgen)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[cfg(feature = "wasm")]
pub enum BuiltinTokenizer {
Character = "Character",
Word = "Word",
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[cfg(not(feature = "wasm"))]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum BuiltinTokenizer {
Character,
Word,
}
impl Deref for BuiltinTokenizer {
type Target = Tokenizer<String>;
fn deref(&self) -> &Self::Target {
match self {
BuiltinTokenizer::Character => todo!(),
BuiltinTokenizer::Word => &word_tokenizer::word_tokenizer,
#[cfg(feature = "wasm")]
BuiltinTokenizer::__Invalid => panic!("Unexpected tokenizer type"),
}
}
}

View file

@ -32,9 +32,6 @@ impl TextWithCursors {
#[must_use]
pub fn cursors(&self) -> Vec<CursorPosition> { self.cursors.clone() }
#[must_use]
pub fn new_owned(text: String, cursors: Vec<CursorPosition>) -> Self { Self { text, cursors } }
}
impl<'a> From<&'a str> for TextWithCursors {
@ -45,3 +42,21 @@ impl<'a> From<&'a str> for TextWithCursors {
}
}
}
impl From<&String> for TextWithCursors {
fn from(text: &String) -> Self {
Self {
text: text.to_owned(),
cursors: Vec::new(),
}
}
}
impl From<String> for TextWithCursors {
fn from(text: String) -> Self {
Self {
text,
cursors: Vec::new(),
}
}
}

View file

@ -13,9 +13,40 @@ use core::str;
use wasm_bindgen::prelude::*;
use crate::{
TextWithCursors, TextWithHistory, reconcile, reconcile_with_cursors, reconcile_with_history,
};
use crate::{BuiltinTokenizer, CursorPosition, TextWithCursors, TextWithHistory};
/// WASM wrapper around `crate::reconcile` for merging text.
#[wasm_bindgen(js_name = reconcile)]
#[must_use]
pub fn reconcile(
parent: &str,
left: &TextWithCursors,
right: &TextWithCursors,
tokenizer: BuiltinTokenizer,
) -> TextWithCursors {
set_panic_hook();
crate::reconcile(parent, left, right, &*tokenizer).apply()
}
/// WASM wrapper around `crate::reconcile` for merging text.
#[wasm_bindgen(js_name = reconcileWithHistory)]
#[must_use]
pub fn reconcile_with_history(
parent: &str,
left: &TextWithCursors,
right: &TextWithCursors,
tokenizer: BuiltinTokenizer,
) -> TextWithCursorsAndHistory {
set_panic_hook();
let reconciled = crate::reconcile(parent, left, right, &*tokenizer);
let text_with_cursors = reconciled.apply();
TextWithCursorsAndHistory {
text_with_cursors,
history: reconciled.apply_with_history(),
}
}
/// Merge two documents with a common parent. Relies on `reconcile::reconcile`
/// for texts and returns the right document as-is if either of the updated
@ -34,56 +65,35 @@ use crate::{
/// # Panics
///
/// If any of the input documents are not valid UTF-8 strings.
#[wasm_bindgen]
#[wasm_bindgen(js_name = genericReconcile)]
#[must_use]
pub fn merge(parent: &[u8], left: &[u8], right: &[u8]) -> Vec<u8> {
pub fn generic_reconcile(
parent: &[u8],
left: &[u8],
right: &[u8],
tokenizer: BuiltinTokenizer,
) -> Vec<u8> {
set_panic_hook();
if is_binary(parent) || is_binary(left) || is_binary(right) {
if crate::is_binary(parent) || crate::is_binary(left) || crate::is_binary(right) {
right.to_vec()
} else {
reconcile(
crate::reconcile(
str::from_utf8(parent).expect("parent must be valid UTF-8 because it's not binary"),
str::from_utf8(left).expect("left must be valid UTF-8 because it's not binary"),
str::from_utf8(right).expect("right must be valid UTF-8 because it's not binary"),
&str::from_utf8(left)
.expect("left must be valid UTF-8 because it's not binary")
.into(),
&str::from_utf8(right)
.expect("right must be valid UTF-8 because it's not binary")
.into(),
&*tokenizer,
)
.apply()
.text()
.into_bytes()
}
}
/// WASM wrapper around `reconcile` for merging text.
#[wasm_bindgen(js_name = mergeText)]
#[must_use]
pub fn merge_text(parent: &str, left: &str, right: &str) -> String {
set_panic_hook();
reconcile(parent, left, right)
}
/// WASM wrapper around `reconcile` for merging text.
#[wasm_bindgen(js_name = mergeTextWithHistory)]
#[must_use]
pub fn merge_text_with_history(parent: &str, left: &str, right: &str) -> Vec<TextWithHistory> {
set_panic_hook();
reconcile_with_history(parent, left, right)
.into_iter()
.collect()
}
/// WASM wrapper around `reconcile::reconcile_with_cursors` for merging text.
#[wasm_bindgen(js_name = mergeTextWithCursors)]
#[must_use]
pub fn merge_text_with_cursors(
parent: &str,
left: &TextWithCursors,
right: &TextWithCursors,
) -> TextWithCursors {
set_panic_hook();
reconcile_with_cursors(parent, left, right)
}
/// Heuristically determine if the given data is a binary or a text file's
/// content.
#[wasm_bindgen(js_name = isBinary)]
@ -98,3 +108,22 @@ fn set_panic_hook() {
#[cfg(feature = "console_error_panic_hook")]
console_error_panic_hook::set_once();
}
#[wasm_bindgen]
#[derive(Debug, Clone, PartialEq, Default)]
pub struct TextWithCursorsAndHistory {
text_with_cursors: TextWithCursors,
history: Vec<TextWithHistory>,
}
#[wasm_bindgen]
impl TextWithCursorsAndHistory {
#[must_use]
pub fn text(&self) -> String { self.text_with_cursors.text() }
#[must_use]
pub fn cursors(&self) -> Vec<CursorPosition> { self.text_with_cursors.cursors() }
#[must_use]
pub fn history(&self) -> Vec<TextWithHistory> { self.history.clone() }
}