Unify WASM and Rust API types

This commit is contained in:
Andras Schmelczer 2025-06-29 17:42:37 +01:00
parent b18a692d46
commit 5378ffb547
16 changed files with 252 additions and 301 deletions

View file

@ -1,57 +0,0 @@
use std::borrow::Cow;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
// CursorPosition represents the position of an identifiable cursor in a text
// document based on its (UTF-8) character index.
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone, PartialEq, Default)]
pub struct CursorPosition {
pub id: usize,
pub char_index: usize,
}
impl CursorPosition {
#[must_use]
pub fn with_index(&self, index: usize) -> Self {
CursorPosition {
id: self.id,
char_index: index,
}
}
}
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone, PartialEq, Default)]
pub struct TextWithCursors<'a> {
pub text: Cow<'a, str>,
pub cursors: Vec<CursorPosition>,
}
impl<'a> TextWithCursors<'a> {
#[must_use]
pub fn new(text: &'a str, cursors: Vec<CursorPosition>) -> Self {
Self {
text: text.into(),
cursors,
}
}
#[must_use]
pub fn new_owned(text: String, cursors: Vec<CursorPosition>) -> Self {
Self {
text: text.into(),
cursors,
}
}
}
impl<'a> From<&'a str> for TextWithCursors<'a> {
fn from(text: &'a str) -> Self {
Self {
text: text.into(),
cursors: Vec::new(),
}
}
}

View file

@ -3,10 +3,11 @@ use std::fmt::Debug;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use super::{CursorPosition, Operation, TextWithCursors};
use crate::{
operation_transformation::utils::{
cook_operations::cook_operations, elongate_operations::elongate_operations,
CursorPosition, TextWithCursors,
operation_transformation::{
Operation,
utils::{cook_operations::cook_operations, elongate_operations::elongate_operations},
},
raw_operation::RawOperation,
tokenizer::{Tokenizer, word_tokenizer::word_tokenizer},
@ -45,7 +46,7 @@ impl<'a> EditedText<'a, String> {
/// word tokenizer is used to tokenize the text which splits the text on
/// whitespaces.
#[must_use]
pub fn from_strings(original: &'a str, updated: TextWithCursors<'a>, side: Side) -> Self {
pub fn from_strings(original: &'a str, updated: &TextWithCursors, side: Side) -> Self {
Self::from_strings_with_tokenizer(original, updated, &word_tokenizer, side)
}
}
@ -61,19 +62,19 @@ where
/// function is used to tokenize the text.
pub fn from_strings_with_tokenizer(
original: &'a str,
updated: TextWithCursors<'a>,
updated: &TextWithCursors,
tokenizer: &Tokenizer<T>,
side: Side,
) -> Self {
let original_tokens = (tokenizer)(original);
let updated_tokens = (tokenizer)(&updated.text);
let updated_tokens = (tokenizer)(&updated.text());
let diff: Vec<RawOperation<T>> = RawOperation::vec_from(&original_tokens, &updated_tokens);
Self::new(
original,
cook_operations(elongate_operations(diff), side).collect(),
updated.cursors,
updated.cursors(),
)
}
@ -239,11 +240,11 @@ where
match operation {
Operation::Equal { .. } => {
history.push(TextWithHistory::new(History::Unchanged, builder.take()))
history.push(TextWithHistory::new(History::Unchanged, builder.take()));
}
Operation::Insert { side, .. } => match side {
Side::Left => {
history.push(TextWithHistory::new(History::AddedFromLeft, builder.take()))
history.push(TextWithHistory::new(History::AddedFromLeft, builder.take()));
}
Side::Right => history.push(TextWithHistory::new(
History::AddedFromRight,
@ -259,10 +260,10 @@ where
let deleted = self.text[*order..*order + *deleted_character_count].to_string();
match side {
Side::Left => {
history.push(TextWithHistory::new(History::RemovedFromLeft, deleted))
history.push(TextWithHistory::new(History::RemovedFromLeft, deleted));
}
Side::Right => {
history.push(TextWithHistory::new(History::RemovedFromRight, deleted))
history.push(TextWithHistory::new(History::RemovedFromRight, deleted));
}
}
}
@ -285,7 +286,7 @@ mod tests {
let left = "hello world! How are you? Adam";
let right = "Hello, my friend! How are you doing? Albert";
let operations = EditedText::from_strings(left, right.into(), Side::Right);
let operations = EditedText::from_strings(left, &right.into(), Side::Right);
insta::assert_debug_snapshot!(operations);
@ -297,7 +298,7 @@ mod tests {
fn test_calculate_operations_with_no_diff() {
let text = "hello world!";
let operations = EditedText::from_strings(text, text.into(), Side::Right);
let operations = EditedText::from_strings(text, &text.into(), Side::Right);
assert_debug_snapshot!(operations);
@ -312,8 +313,8 @@ mod tests {
let right = "Hello world! How are you?";
let expected = "Hello world! How are you? I'm Andras.";
let operations_1 = EditedText::from_strings(original, left.into(), Side::Left);
let operations_2 = EditedText::from_strings(original, right.into(), Side::Right);
let operations_1 = EditedText::from_strings(original, &left.into(), Side::Left);
let operations_2 = EditedText::from_strings(original, &right.into(), Side::Right);
let operations = operations_1.merge(operations_2);
assert_eq!(operations.apply(), expected);

View file

@ -1,6 +1,6 @@
use std::fmt::Debug;
use crate::{operation_transformation::Operation, raw_operation::RawOperation, utils::side::Side};
use crate::{operation_transformation::Operation, raw_operation::RawOperation, types::side::Side};
/// Turn raw operations into ordered operations while keeping track of the
/// original token's indexes.

View file

@ -4,7 +4,7 @@ use crate::{tokenizer::token::Token, utils::myers_diff::myers_diff};
/// Text editing operation containing the to-be-changed `Tokens`-s.
///
/// RawOperations can be joined together when the underlying tokens
/// `RawOperations` can be joined together when the underlying tokens
/// allow for joining subseqeunt operations.
#[derive(Debug, Clone, PartialEq)]
pub enum RawOperation<T>

View file

@ -1,3 +1,5 @@
pub mod cursor_position;
pub mod history;
pub mod side;
pub mod text_with_cursors;
pub mod text_with_history;

View file

@ -0,0 +1,36 @@
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
#[cfg(feature = "wasm")]
use wasm_bindgen::prelude::*;
// CursorPosition represents the position of an identifiable cursor in a text
// document based on its (UTF-8) character index.
#[cfg_attr(feature = "wasm", wasm_bindgen)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone, PartialEq, Default)]
pub struct CursorPosition {
pub id: usize,
pub char_index: usize,
}
#[cfg_attr(feature = "wasm", wasm_bindgen)]
impl CursorPosition {
#[cfg_attr(feature = "wasm", wasm_bindgen(constructor))]
#[must_use]
pub fn new(id: usize, char_index: usize) -> Self { Self { id, char_index } }
#[must_use]
pub fn with_index(&self, index: usize) -> Self {
CursorPosition {
id: self.id,
char_index: index,
}
}
#[must_use]
pub fn id(&self) -> usize { self.id }
#[cfg_attr(feature = "wasm", wasm_bindgen(js_name = characterPosition))]
#[must_use]
pub fn char_index(&self) -> usize { self.char_index }
}

View file

@ -0,0 +1,47 @@
#[cfg(feature = "wasm")]
use wasm_bindgen::prelude::*;
use crate::types::cursor_position::CursorPosition;
#[cfg_attr(feature = "wasm", wasm_bindgen)]
#[derive(Debug, Clone, PartialEq, Default)]
pub struct TextWithCursors {
text: String, // wasm-pack doesn't support generics so we can't use Cow here
cursors: Vec<CursorPosition>,
}
#[cfg_attr(feature = "wasm", wasm_bindgen)]
impl TextWithCursors {
#[cfg_attr(feature = "wasm", wasm_bindgen(constructor))]
#[must_use]
pub fn new(text: String, cursors: Vec<CursorPosition>) -> Self {
let length = text.chars().count();
for cursor in &cursors {
debug_assert!(
cursor.char_index <= length,
// cursor.char_index == length means that the cursor is at the end
"Cursor positions must be contained within the text or just after the end"
);
}
Self { text, cursors }
}
#[must_use]
pub fn text(&self) -> String { self.text.to_string() }
#[must_use]
pub fn cursors(&self) -> Vec<CursorPosition> { self.cursors.clone() }
#[must_use]
pub fn new_owned(text: String, cursors: Vec<CursorPosition>) -> Self { Self { text, cursors } }
}
impl<'a> From<&'a str> for TextWithCursors {
fn from(text: &'a str) -> Self {
Self {
text: text.into(),
cursors: Vec::new(),
}
}
}

View file

@ -5,7 +5,7 @@ use wasm_bindgen::prelude::*;
use crate::types::history::History;
/// Wrapper type to expose `(History, String)` to JS.
/// Wrapper type for `(History, String)`
#[cfg_attr(feature = "wasm", wasm_bindgen)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone, PartialEq)]
@ -16,6 +16,7 @@ pub struct TextWithHistory {
#[cfg_attr(feature = "wasm", wasm_bindgen)]
impl TextWithHistory {
#[must_use]
pub fn new(history: History, text: String) -> Self { TextWithHistory { history, text } }
#[must_use]

View file

@ -1,5 +1,6 @@
pub mod common_prefix_len;
pub mod common_suffix_len;
pub mod find_longest_prefix_contained_within;
pub mod is_binary;
pub mod myers_diff;
pub mod string_builder;

24
src/utils/is_binary.rs Normal file
View file

@ -0,0 +1,24 @@
/// Heuristically determine if the given data is a binary or a text file's
/// content.
#[must_use]
pub fn is_binary(data: &[u8]) -> bool {
if data.contains(&0) {
// Even though the NUL character is valid in UTF-8, it's highly suspicious in
// human-readable text.
return true;
}
std::str::from_utf8(data).is_err()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_is_binary() {
assert!(is_binary(&[0, 159, 146, 150]));
assert!(is_binary(&[0, 12]));
assert!(!is_binary(b"hello"));
}
}

View file

@ -1,2 +1,100 @@
pub mod lib;
pub mod types;
//! This crate provides utilities for easily communicating between backend &
//! frontend and ensuring the same logic for encoding and decoding binary data,
//! and 3-way-merging documents in Rust and JavaScript.
//!
//! The crate is designed to be used as a Rust library and as a
//! TypeScript/JavaScript package through WebAssembly (WASM).
//!
//! # Modules
//!
//! - `errors`: Contains error types used in this crate.
use core::str;
use wasm_bindgen::prelude::*;
use crate::{
TextWithCursors, TextWithHistory, reconcile, reconcile_with_cursors, reconcile_with_history,
};
/// Merge two documents with a common parent. Relies on `reconcile::reconcile`
/// for texts and returns the right document as-is if either of the updated
/// documents is binary.
///
/// # Arguments
///
/// - `parent`: The common parent document.
/// - `left`: The left document updated by one user.
/// - `right`: The right document updated by another user.
///
/// # Returns
///
/// The merged document.
///
/// # Panics
///
/// If any of the input documents are not valid UTF-8 strings.
#[wasm_bindgen]
#[must_use]
pub fn merge(parent: &[u8], left: &[u8], right: &[u8]) -> Vec<u8> {
set_panic_hook();
if is_binary(parent) || is_binary(left) || is_binary(right) {
right.to_vec()
} else {
reconcile(
str::from_utf8(parent).expect("parent must be valid UTF-8 because it's not binary"),
str::from_utf8(left).expect("left must be valid UTF-8 because it's not binary"),
str::from_utf8(right).expect("right must be valid UTF-8 because it's not binary"),
)
.into_bytes()
}
}
/// WASM wrapper around `reconcile` for merging text.
#[wasm_bindgen(js_name = mergeText)]
#[must_use]
pub fn merge_text(parent: &str, left: &str, right: &str) -> String {
set_panic_hook();
reconcile(parent, left, right)
}
/// WASM wrapper around `reconcile` for merging text.
#[wasm_bindgen(js_name = mergeTextWithHistory)]
#[must_use]
pub fn merge_text_with_history(parent: &str, left: &str, right: &str) -> Vec<TextWithHistory> {
set_panic_hook();
reconcile_with_history(parent, left, right)
.into_iter()
.collect()
}
/// WASM wrapper around `reconcile::reconcile_with_cursors` for merging text.
#[wasm_bindgen(js_name = mergeTextWithCursors)]
#[must_use]
pub fn merge_text_with_cursors(
parent: &str,
left: &TextWithCursors,
right: &TextWithCursors,
) -> TextWithCursors {
set_panic_hook();
reconcile_with_cursors(parent, left, right)
}
/// Heuristically determine if the given data is a binary or a text file's
/// content.
#[wasm_bindgen(js_name = isBinary)]
#[must_use]
pub fn is_binary(data: &[u8]) -> bool {
set_panic_hook();
crate::is_binary(data)
}
fn set_panic_hook() {
// https://github.com/rustwasm/console_error_panic_hook#readme
#[cfg(feature = "console_error_panic_hook")]
console_error_panic_hook::set_once();
}

View file

@ -1,106 +0,0 @@
//! This crate provides utilities for easily communicating between backend &
//! frontend and ensuring the same logic for encoding and decoding binary data,
//! and 3-way-merging documents in Rust and JavaScript.
//!
//! The crate is designed to be used as a Rust library and as a
//! TypeScript/JavaScript package through WebAssembly (WASM).
//!
//! # Modules
//!
//! - `errors`: Contains error types used in this crate.
use core::str;
use wasm_bindgen::prelude::*;
use crate::wasm::types::{JsTextWithCursors, JsTextWithHistory};
/// Merge two documents with a common parent. Relies on `reconcile::reconcile`
/// for texts and returns the right document as-is if either of the updated
/// documents is binary.
///
/// # Arguments
///
/// - `parent`: The common parent document.
/// - `left`: The left document updated by one user.
/// - `right`: The right document updated by another user.
///
/// # Returns
///
/// The merged document.
///
/// # Panics
///
/// If any of the input documents are not valid UTF-8 strings.
#[wasm_bindgen]
#[must_use]
pub fn merge(parent: &[u8], left: &[u8], right: &[u8]) -> Vec<u8> {
set_panic_hook();
if is_binary(parent) || is_binary(left) || is_binary(right) {
right.to_vec()
} else {
crate::reconcile(
str::from_utf8(parent).expect("parent must be valid UTF-8 because it's not binary"),
str::from_utf8(left).expect("left must be valid UTF-8 because it's not binary"),
str::from_utf8(right).expect("right must be valid UTF-8 because it's not binary"),
)
.into_bytes()
}
}
/// WASM wrapper around `crate::reconcile` for merging text.
#[wasm_bindgen(js_name = mergeText)]
#[must_use]
pub fn merge_text(parent: &str, left: &str, right: &str) -> String {
set_panic_hook();
crate::reconcile(parent, left, right)
}
/// WASM wrapper around `crate::reconcile` for merging text.
#[wasm_bindgen(js_name = mergeTextWithHistory)]
#[must_use]
pub fn merge_text_with_history(parent: &str, left: &str, right: &str) -> Vec<JsTextWithHistory> {
set_panic_hook();
crate::reconcile_with_history(parent, left, right)
.into_iter()
.map(Into::into)
.collect()
}
/// WASM wrapper around `reconcile::reconcile_with_cursors` for merging text.
#[wasm_bindgen(js_name = mergeTextWithCursors)]
#[must_use]
pub fn merge_text_with_cursors(
parent: &str,
left: JsTextWithCursors,
right: JsTextWithCursors,
) -> JsTextWithCursors {
set_panic_hook();
crate::reconcile_with_cursors(parent, left.into(), right.into()).into()
}
/// Heuristically determine if the given data is a binary or a text file's
/// content.
#[wasm_bindgen(js_name = isBinary)]
#[must_use]
pub fn is_binary(data: &[u8]) -> bool {
set_panic_hook();
if data.contains(&0) {
// Even though the NUL character is valid in UTF-8, it's highly suspicious in
// human-readable text.
return true;
}
std::str::from_utf8(data).is_err()
}
fn set_panic_hook() {
// https://github.com/rustwasm/console_error_panic_hook#readme
#[cfg(feature = "console_error_panic_hook")]
console_error_panic_hook::set_once();
}

View file

@ -1,93 +0,0 @@
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
#[cfg(feature = "wasm")]
use wasm_bindgen::prelude::*;
use crate::History;
/// Wrapper type to expose `TextWithCursors` to JS.
#[wasm_bindgen]
#[derive(Debug, Clone, PartialEq)]
pub struct JsTextWithCursors {
text: String,
cursors: Vec<JsCursorPosition>,
}
#[wasm_bindgen]
impl JsTextWithCursors {
#[wasm_bindgen(constructor)]
#[must_use]
pub fn new(text: String, cursors: Vec<JsCursorPosition>) -> Self { Self { text, cursors } }
#[must_use]
pub fn text(&self) -> String { self.text.clone() }
#[must_use]
pub fn cursors(&self) -> Vec<JsCursorPosition> { self.cursors.clone() }
}
impl From<JsTextWithCursors> for crate::TextWithCursors<'_> {
fn from(owned: JsTextWithCursors) -> Self {
crate::TextWithCursors::new_owned(
owned.text.to_string(),
owned
.cursors
.into_iter()
.map(std::convert::Into::into)
.collect(),
)
}
}
impl From<crate::TextWithCursors<'_>> for JsTextWithCursors {
fn from(text_with_cursors: crate::TextWithCursors<'_>) -> Self {
JsTextWithCursors {
text: text_with_cursors.text.into_owned(),
cursors: text_with_cursors
.cursors
.into_iter()
.map(std::convert::Into::into)
.collect(),
}
}
}
/// Wrapper type to expose `CursorPosition` to JS.
#[wasm_bindgen]
#[derive(Debug, Clone, PartialEq)]
pub struct JsCursorPosition {
id: usize,
char_index: usize,
}
#[wasm_bindgen]
impl JsCursorPosition {
#[wasm_bindgen(constructor)]
#[must_use]
pub fn new(id: usize, char_index: usize) -> Self { Self { id, char_index } }
#[must_use]
pub fn id(&self) -> usize { self.id }
#[wasm_bindgen(js_name = characterPosition)]
#[must_use]
pub fn char_index(&self) -> usize { self.char_index }
}
impl From<JsCursorPosition> for crate::CursorPosition {
fn from(owned: JsCursorPosition) -> Self {
crate::CursorPosition {
id: owned.id,
char_index: owned.char_index,
}
}
}
impl From<crate::CursorPosition> for JsCursorPosition {
fn from(cursor: crate::CursorPosition) -> Self {
JsCursorPosition {
id: cursor.id,
char_index: cursor.char_index,
}
}
}

View file

@ -21,12 +21,12 @@ impl ExampleDocument {
pub fn parent(&self) -> String { self.parent.clone() }
#[must_use]
pub fn left(&self) -> TextWithCursors<'static> {
pub fn left(&self) -> TextWithCursors {
ExampleDocument::string_to_text_with_cursors(&self.left)
}
#[must_use]
pub fn right(&self) -> TextWithCursors<'static> {
pub fn right(&self) -> TextWithCursors {
ExampleDocument::string_to_text_with_cursors(&self.right)
}
@ -37,7 +37,7 @@ impl ExampleDocument {
///
/// If the result string does not match the expected string, the program
/// will panic.
pub fn assert_eq(&self, result: &TextWithCursors<'static>) {
pub fn assert_eq(&self, result: &TextWithCursors) {
let result_str = ExampleDocument::text_with_cursors_to_string(result);
assert_eq!(
self.expected, result_str,
@ -53,16 +53,16 @@ impl ExampleDocument {
/// If the result string does not match the expected string, the program
/// will panic.
pub fn assert_eq_without_cursors(&self, result: &str) {
let expected = ExampleDocument::string_to_text_with_cursors(&self.expected).text;
let expected = ExampleDocument::string_to_text_with_cursors(&self.expected).text();
assert_eq!(
expected, result,
"Left (expected) isn't equal to right (actual), Actual: ```\n{result}```",
);
}
fn text_with_cursors_to_string(text: &TextWithCursors<'_>) -> String {
let mut result = text.text.clone().into_owned();
for (i, cursor) in text.cursors.iter().enumerate() {
fn text_with_cursors_to_string(document: &TextWithCursors) -> String {
let mut result = document.text().clone();
for (i, cursor) in document.cursors().iter().enumerate() {
assert!(
cursor.char_index <= result.len(), // equals in case of insert at the end
"Cursor index out of bounds: {} > {} when testing for '{result}'",
@ -82,7 +82,7 @@ impl ExampleDocument {
result
}
fn string_to_text_with_cursors(text: &str) -> TextWithCursors<'static> {
fn string_to_text_with_cursors(text: &str) -> TextWithCursors {
let cursors = Self::parse_cursors(text);
let text = text.replace('|', "");
TextWithCursors::new_owned(text, cursors)

View file

@ -11,8 +11,8 @@ fn test_document_one_way_without_cursors() {
for doc in &get_all_documents() {
doc.assert_eq_without_cursors(&reconcile(
&doc.parent(),
&doc.left().text,
&doc.right().text,
&doc.left().text(),
&doc.right().text(),
));
}
}
@ -22,8 +22,8 @@ fn test_document_one_way_with_cursors() {
for doc in &get_all_documents() {
doc.assert_eq(&reconcile_with_cursors(
&doc.parent(),
doc.left(),
doc.right(),
&doc.left(),
&doc.right(),
));
}
}
@ -33,8 +33,8 @@ fn test_document_inverse_way_without_cursors() {
for doc in &get_all_documents() {
doc.assert_eq_without_cursors(&reconcile(
&doc.parent(),
&doc.right().text,
&doc.left().text,
&doc.right().text(),
&doc.left().text(),
));
}
}
@ -44,8 +44,8 @@ fn test_document_inverse_way_with_cursors() {
for doc in &get_all_documents() {
doc.assert_eq(&reconcile_with_cursors(
&doc.parent(),
doc.right(),
doc.left(),
&doc.right(),
&doc.left(),
));
}
}

View file

@ -1,9 +1,6 @@
#![cfg(feature = "wasm")]
use reconcile::wasm::{
lib::{is_binary, merge, merge_text, merge_text_with_cursors},
types::{JsCursorPosition, JsTextWithCursors},
};
use reconcile::{CursorPosition, TextWithCursors, wasm::*};
use wasm_bindgen_test::*;
#[wasm_bindgen_test(unsupported = test)]
@ -31,18 +28,18 @@ fn test_merge_text() {
fn test_merge_text_with_cursors() {
let result = merge_text_with_cursors(
"hi",
JsTextWithCursors::new("hi world".to_owned(), vec![]),
JsTextWithCursors::new(
&TextWithCursors::new("hi world".to_owned(), vec![]),
&TextWithCursors::new(
"hi".to_owned(),
vec![JsCursorPosition::new(0, 1), JsCursorPosition::new(1, 2)],
vec![CursorPosition::new(0, 1), CursorPosition::new(1, 2)],
),
);
assert_eq!(
result,
JsTextWithCursors::new(
TextWithCursors::new(
"hi world".to_owned(),
vec![JsCursorPosition::new(0, 1), JsCursorPosition::new(1, 2)]
vec![CursorPosition::new(0, 1), CursorPosition::new(1, 2)]
),
);
}