Unify WASM and Rust API types

This commit is contained in:
Andras Schmelczer 2025-06-29 17:42:37 +01:00
parent b18a692d46
commit 5378ffb547
16 changed files with 252 additions and 301 deletions

View file

@ -1,57 +0,0 @@
use std::borrow::Cow;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
// CursorPosition represents the position of an identifiable cursor in a text
// document based on its (UTF-8) character index.
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone, PartialEq, Default)]
pub struct CursorPosition {
pub id: usize,
pub char_index: usize,
}
impl CursorPosition {
#[must_use]
pub fn with_index(&self, index: usize) -> Self {
CursorPosition {
id: self.id,
char_index: index,
}
}
}
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone, PartialEq, Default)]
pub struct TextWithCursors<'a> {
pub text: Cow<'a, str>,
pub cursors: Vec<CursorPosition>,
}
impl<'a> TextWithCursors<'a> {
#[must_use]
pub fn new(text: &'a str, cursors: Vec<CursorPosition>) -> Self {
Self {
text: text.into(),
cursors,
}
}
#[must_use]
pub fn new_owned(text: String, cursors: Vec<CursorPosition>) -> Self {
Self {
text: text.into(),
cursors,
}
}
}
impl<'a> From<&'a str> for TextWithCursors<'a> {
fn from(text: &'a str) -> Self {
Self {
text: text.into(),
cursors: Vec::new(),
}
}
}

View file

@ -3,10 +3,11 @@ use std::fmt::Debug;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use super::{CursorPosition, Operation, TextWithCursors};
use crate::{
operation_transformation::utils::{
cook_operations::cook_operations, elongate_operations::elongate_operations,
CursorPosition, TextWithCursors,
operation_transformation::{
Operation,
utils::{cook_operations::cook_operations, elongate_operations::elongate_operations},
},
raw_operation::RawOperation,
tokenizer::{Tokenizer, word_tokenizer::word_tokenizer},
@ -45,7 +46,7 @@ impl<'a> EditedText<'a, String> {
/// word tokenizer is used to tokenize the text which splits the text on
/// whitespaces.
#[must_use]
pub fn from_strings(original: &'a str, updated: TextWithCursors<'a>, side: Side) -> Self {
pub fn from_strings(original: &'a str, updated: &TextWithCursors, side: Side) -> Self {
Self::from_strings_with_tokenizer(original, updated, &word_tokenizer, side)
}
}
@ -61,19 +62,19 @@ where
/// function is used to tokenize the text.
pub fn from_strings_with_tokenizer(
original: &'a str,
updated: TextWithCursors<'a>,
updated: &TextWithCursors,
tokenizer: &Tokenizer<T>,
side: Side,
) -> Self {
let original_tokens = (tokenizer)(original);
let updated_tokens = (tokenizer)(&updated.text);
let updated_tokens = (tokenizer)(&updated.text());
let diff: Vec<RawOperation<T>> = RawOperation::vec_from(&original_tokens, &updated_tokens);
Self::new(
original,
cook_operations(elongate_operations(diff), side).collect(),
updated.cursors,
updated.cursors(),
)
}
@ -239,11 +240,11 @@ where
match operation {
Operation::Equal { .. } => {
history.push(TextWithHistory::new(History::Unchanged, builder.take()))
history.push(TextWithHistory::new(History::Unchanged, builder.take()));
}
Operation::Insert { side, .. } => match side {
Side::Left => {
history.push(TextWithHistory::new(History::AddedFromLeft, builder.take()))
history.push(TextWithHistory::new(History::AddedFromLeft, builder.take()));
}
Side::Right => history.push(TextWithHistory::new(
History::AddedFromRight,
@ -259,10 +260,10 @@ where
let deleted = self.text[*order..*order + *deleted_character_count].to_string();
match side {
Side::Left => {
history.push(TextWithHistory::new(History::RemovedFromLeft, deleted))
history.push(TextWithHistory::new(History::RemovedFromLeft, deleted));
}
Side::Right => {
history.push(TextWithHistory::new(History::RemovedFromRight, deleted))
history.push(TextWithHistory::new(History::RemovedFromRight, deleted));
}
}
}
@ -285,7 +286,7 @@ mod tests {
let left = "hello world! How are you? Adam";
let right = "Hello, my friend! How are you doing? Albert";
let operations = EditedText::from_strings(left, right.into(), Side::Right);
let operations = EditedText::from_strings(left, &right.into(), Side::Right);
insta::assert_debug_snapshot!(operations);
@ -297,7 +298,7 @@ mod tests {
fn test_calculate_operations_with_no_diff() {
let text = "hello world!";
let operations = EditedText::from_strings(text, text.into(), Side::Right);
let operations = EditedText::from_strings(text, &text.into(), Side::Right);
assert_debug_snapshot!(operations);
@ -312,8 +313,8 @@ mod tests {
let right = "Hello world! How are you?";
let expected = "Hello world! How are you? I'm Andras.";
let operations_1 = EditedText::from_strings(original, left.into(), Side::Left);
let operations_2 = EditedText::from_strings(original, right.into(), Side::Right);
let operations_1 = EditedText::from_strings(original, &left.into(), Side::Left);
let operations_2 = EditedText::from_strings(original, &right.into(), Side::Right);
let operations = operations_1.merge(operations_2);
assert_eq!(operations.apply(), expected);

View file

@ -1,6 +1,6 @@
use std::fmt::Debug;
use crate::{operation_transformation::Operation, raw_operation::RawOperation, utils::side::Side};
use crate::{operation_transformation::Operation, raw_operation::RawOperation, types::side::Side};
/// Turn raw operations into ordered operations while keeping track of the
/// original token's indexes.

View file

@ -4,7 +4,7 @@ use crate::{tokenizer::token::Token, utils::myers_diff::myers_diff};
/// Text editing operation containing the to-be-changed `Tokens`-s.
///
/// RawOperations can be joined together when the underlying tokens
/// `RawOperations` can be joined together when the underlying tokens
/// allow for joining subseqeunt operations.
#[derive(Debug, Clone, PartialEq)]
pub enum RawOperation<T>

View file

@ -1,3 +1,5 @@
pub mod cursor_position;
pub mod history;
pub mod side;
pub mod text_with_cursors;
pub mod text_with_history;

View file

@ -0,0 +1,36 @@
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
#[cfg(feature = "wasm")]
use wasm_bindgen::prelude::*;
// CursorPosition represents the position of an identifiable cursor in a text
// document based on its (UTF-8) character index.
#[cfg_attr(feature = "wasm", wasm_bindgen)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone, PartialEq, Default)]
pub struct CursorPosition {
pub id: usize,
pub char_index: usize,
}
#[cfg_attr(feature = "wasm", wasm_bindgen)]
impl CursorPosition {
#[cfg_attr(feature = "wasm", wasm_bindgen(constructor))]
#[must_use]
pub fn new(id: usize, char_index: usize) -> Self { Self { id, char_index } }
#[must_use]
pub fn with_index(&self, index: usize) -> Self {
CursorPosition {
id: self.id,
char_index: index,
}
}
#[must_use]
pub fn id(&self) -> usize { self.id }
#[cfg_attr(feature = "wasm", wasm_bindgen(js_name = characterPosition))]
#[must_use]
pub fn char_index(&self) -> usize { self.char_index }
}

View file

@ -0,0 +1,47 @@
#[cfg(feature = "wasm")]
use wasm_bindgen::prelude::*;
use crate::types::cursor_position::CursorPosition;
#[cfg_attr(feature = "wasm", wasm_bindgen)]
#[derive(Debug, Clone, PartialEq, Default)]
pub struct TextWithCursors {
text: String, // wasm-pack doesn't support generics so we can't use Cow here
cursors: Vec<CursorPosition>,
}
#[cfg_attr(feature = "wasm", wasm_bindgen)]
impl TextWithCursors {
#[cfg_attr(feature = "wasm", wasm_bindgen(constructor))]
#[must_use]
pub fn new(text: String, cursors: Vec<CursorPosition>) -> Self {
let length = text.chars().count();
for cursor in &cursors {
debug_assert!(
cursor.char_index <= length,
// cursor.char_index == length means that the cursor is at the end
"Cursor positions must be contained within the text or just after the end"
);
}
Self { text, cursors }
}
#[must_use]
pub fn text(&self) -> String { self.text.to_string() }
#[must_use]
pub fn cursors(&self) -> Vec<CursorPosition> { self.cursors.clone() }
#[must_use]
pub fn new_owned(text: String, cursors: Vec<CursorPosition>) -> Self { Self { text, cursors } }
}
impl<'a> From<&'a str> for TextWithCursors {
fn from(text: &'a str) -> Self {
Self {
text: text.into(),
cursors: Vec::new(),
}
}
}

View file

@ -5,7 +5,7 @@ use wasm_bindgen::prelude::*;
use crate::types::history::History;
/// Wrapper type to expose `(History, String)` to JS.
/// Wrapper type for `(History, String)`
#[cfg_attr(feature = "wasm", wasm_bindgen)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone, PartialEq)]
@ -16,6 +16,7 @@ pub struct TextWithHistory {
#[cfg_attr(feature = "wasm", wasm_bindgen)]
impl TextWithHistory {
#[must_use]
pub fn new(history: History, text: String) -> Self { TextWithHistory { history, text } }
#[must_use]

View file

@ -1,5 +1,6 @@
pub mod common_prefix_len;
pub mod common_suffix_len;
pub mod find_longest_prefix_contained_within;
pub mod is_binary;
pub mod myers_diff;
pub mod string_builder;

24
src/utils/is_binary.rs Normal file
View file

@ -0,0 +1,24 @@
/// Heuristically determine if the given data is a binary or a text file's
/// content.
#[must_use]
pub fn is_binary(data: &[u8]) -> bool {
if data.contains(&0) {
// Even though the NUL character is valid in UTF-8, it's highly suspicious in
// human-readable text.
return true;
}
std::str::from_utf8(data).is_err()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_is_binary() {
assert!(is_binary(&[0, 159, 146, 150]));
assert!(is_binary(&[0, 12]));
assert!(!is_binary(b"hello"));
}
}

View file

@ -1,2 +1,100 @@
pub mod lib;
pub mod types;
//! This crate provides utilities for easily communicating between backend &
//! frontend and ensuring the same logic for encoding and decoding binary data,
//! and 3-way-merging documents in Rust and JavaScript.
//!
//! The crate is designed to be used as a Rust library and as a
//! TypeScript/JavaScript package through WebAssembly (WASM).
//!
//! # Modules
//!
//! - `errors`: Contains error types used in this crate.
use core::str;
use wasm_bindgen::prelude::*;
use crate::{
TextWithCursors, TextWithHistory, reconcile, reconcile_with_cursors, reconcile_with_history,
};
/// Merge two documents with a common parent. Relies on `reconcile::reconcile`
/// for texts and returns the right document as-is if either of the updated
/// documents is binary.
///
/// # Arguments
///
/// - `parent`: The common parent document.
/// - `left`: The left document updated by one user.
/// - `right`: The right document updated by another user.
///
/// # Returns
///
/// The merged document.
///
/// # Panics
///
/// If any of the input documents are not valid UTF-8 strings.
#[wasm_bindgen]
#[must_use]
pub fn merge(parent: &[u8], left: &[u8], right: &[u8]) -> Vec<u8> {
set_panic_hook();
if is_binary(parent) || is_binary(left) || is_binary(right) {
right.to_vec()
} else {
reconcile(
str::from_utf8(parent).expect("parent must be valid UTF-8 because it's not binary"),
str::from_utf8(left).expect("left must be valid UTF-8 because it's not binary"),
str::from_utf8(right).expect("right must be valid UTF-8 because it's not binary"),
)
.into_bytes()
}
}
/// WASM wrapper around `reconcile` for merging text.
#[wasm_bindgen(js_name = mergeText)]
#[must_use]
pub fn merge_text(parent: &str, left: &str, right: &str) -> String {
set_panic_hook();
reconcile(parent, left, right)
}
/// WASM wrapper around `reconcile` for merging text.
#[wasm_bindgen(js_name = mergeTextWithHistory)]
#[must_use]
pub fn merge_text_with_history(parent: &str, left: &str, right: &str) -> Vec<TextWithHistory> {
set_panic_hook();
reconcile_with_history(parent, left, right)
.into_iter()
.collect()
}
/// WASM wrapper around `reconcile::reconcile_with_cursors` for merging text.
#[wasm_bindgen(js_name = mergeTextWithCursors)]
#[must_use]
pub fn merge_text_with_cursors(
parent: &str,
left: &TextWithCursors,
right: &TextWithCursors,
) -> TextWithCursors {
set_panic_hook();
reconcile_with_cursors(parent, left, right)
}
/// Heuristically determine if the given data is a binary or a text file's
/// content.
#[wasm_bindgen(js_name = isBinary)]
#[must_use]
pub fn is_binary(data: &[u8]) -> bool {
set_panic_hook();
crate::is_binary(data)
}
fn set_panic_hook() {
// https://github.com/rustwasm/console_error_panic_hook#readme
#[cfg(feature = "console_error_panic_hook")]
console_error_panic_hook::set_once();
}

View file

@ -1,106 +0,0 @@
//! This crate provides utilities for easily communicating between backend &
//! frontend and ensuring the same logic for encoding and decoding binary data,
//! and 3-way-merging documents in Rust and JavaScript.
//!
//! The crate is designed to be used as a Rust library and as a
//! TypeScript/JavaScript package through WebAssembly (WASM).
//!
//! # Modules
//!
//! - `errors`: Contains error types used in this crate.
use core::str;
use wasm_bindgen::prelude::*;
use crate::wasm::types::{JsTextWithCursors, JsTextWithHistory};
/// Merge two documents with a common parent. Relies on `reconcile::reconcile`
/// for texts and returns the right document as-is if either of the updated
/// documents is binary.
///
/// # Arguments
///
/// - `parent`: The common parent document.
/// - `left`: The left document updated by one user.
/// - `right`: The right document updated by another user.
///
/// # Returns
///
/// The merged document.
///
/// # Panics
///
/// If any of the input documents are not valid UTF-8 strings.
#[wasm_bindgen]
#[must_use]
pub fn merge(parent: &[u8], left: &[u8], right: &[u8]) -> Vec<u8> {
set_panic_hook();
if is_binary(parent) || is_binary(left) || is_binary(right) {
right.to_vec()
} else {
crate::reconcile(
str::from_utf8(parent).expect("parent must be valid UTF-8 because it's not binary"),
str::from_utf8(left).expect("left must be valid UTF-8 because it's not binary"),
str::from_utf8(right).expect("right must be valid UTF-8 because it's not binary"),
)
.into_bytes()
}
}
/// WASM wrapper around `crate::reconcile` for merging text.
#[wasm_bindgen(js_name = mergeText)]
#[must_use]
pub fn merge_text(parent: &str, left: &str, right: &str) -> String {
set_panic_hook();
crate::reconcile(parent, left, right)
}
/// WASM wrapper around `crate::reconcile` for merging text.
#[wasm_bindgen(js_name = mergeTextWithHistory)]
#[must_use]
pub fn merge_text_with_history(parent: &str, left: &str, right: &str) -> Vec<JsTextWithHistory> {
set_panic_hook();
crate::reconcile_with_history(parent, left, right)
.into_iter()
.map(Into::into)
.collect()
}
/// WASM wrapper around `reconcile::reconcile_with_cursors` for merging text.
#[wasm_bindgen(js_name = mergeTextWithCursors)]
#[must_use]
pub fn merge_text_with_cursors(
parent: &str,
left: JsTextWithCursors,
right: JsTextWithCursors,
) -> JsTextWithCursors {
set_panic_hook();
crate::reconcile_with_cursors(parent, left.into(), right.into()).into()
}
/// Heuristically determine if the given data is a binary or a text file's
/// content.
#[wasm_bindgen(js_name = isBinary)]
#[must_use]
pub fn is_binary(data: &[u8]) -> bool {
set_panic_hook();
if data.contains(&0) {
// Even though the NUL character is valid in UTF-8, it's highly suspicious in
// human-readable text.
return true;
}
std::str::from_utf8(data).is_err()
}
fn set_panic_hook() {
// https://github.com/rustwasm/console_error_panic_hook#readme
#[cfg(feature = "console_error_panic_hook")]
console_error_panic_hook::set_once();
}

View file

@ -1,93 +0,0 @@
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
#[cfg(feature = "wasm")]
use wasm_bindgen::prelude::*;
use crate::History;
/// Wrapper type to expose `TextWithCursors` to JS.
#[wasm_bindgen]
#[derive(Debug, Clone, PartialEq)]
pub struct JsTextWithCursors {
text: String,
cursors: Vec<JsCursorPosition>,
}
#[wasm_bindgen]
impl JsTextWithCursors {
#[wasm_bindgen(constructor)]
#[must_use]
pub fn new(text: String, cursors: Vec<JsCursorPosition>) -> Self { Self { text, cursors } }
#[must_use]
pub fn text(&self) -> String { self.text.clone() }
#[must_use]
pub fn cursors(&self) -> Vec<JsCursorPosition> { self.cursors.clone() }
}
impl From<JsTextWithCursors> for crate::TextWithCursors<'_> {
fn from(owned: JsTextWithCursors) -> Self {
crate::TextWithCursors::new_owned(
owned.text.to_string(),
owned
.cursors
.into_iter()
.map(std::convert::Into::into)
.collect(),
)
}
}
impl From<crate::TextWithCursors<'_>> for JsTextWithCursors {
fn from(text_with_cursors: crate::TextWithCursors<'_>) -> Self {
JsTextWithCursors {
text: text_with_cursors.text.into_owned(),
cursors: text_with_cursors
.cursors
.into_iter()
.map(std::convert::Into::into)
.collect(),
}
}
}
/// Wrapper type to expose `CursorPosition` to JS.
#[wasm_bindgen]
#[derive(Debug, Clone, PartialEq)]
pub struct JsCursorPosition {
id: usize,
char_index: usize,
}
#[wasm_bindgen]
impl JsCursorPosition {
#[wasm_bindgen(constructor)]
#[must_use]
pub fn new(id: usize, char_index: usize) -> Self { Self { id, char_index } }
#[must_use]
pub fn id(&self) -> usize { self.id }
#[wasm_bindgen(js_name = characterPosition)]
#[must_use]
pub fn char_index(&self) -> usize { self.char_index }
}
impl From<JsCursorPosition> for crate::CursorPosition {
fn from(owned: JsCursorPosition) -> Self {
crate::CursorPosition {
id: owned.id,
char_index: owned.char_index,
}
}
}
impl From<crate::CursorPosition> for JsCursorPosition {
fn from(cursor: crate::CursorPosition) -> Self {
JsCursorPosition {
id: cursor.id,
char_index: cursor.char_index,
}
}
}