diff --git a/src/lib.rs b/src/lib.rs index 407a5df..f200340 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -100,11 +100,11 @@ //! let parent = "Hello world"; //! let left = TextWithCursors::new( //! "Hello beautiful world".to_string(), -//! vec![CursorPosition { id: 1, char_index: 6 }] // After "Hello " +//! vec![CursorPosition::new(1, 6)] // After "Hello " //! ); //! let right = TextWithCursors::new( //! "Hi world".to_string(), -//! vec![CursorPosition { id: 2, char_index: 0 }] // At the beginning +//! vec![CursorPosition::new(2, 0)] // At the beginning //! ); //! //! let result = reconcile(parent, &left, &right, &*BuiltinTokenizer::Word); @@ -173,7 +173,7 @@ //! &changes.into() //! ); //! -//! let serialized = serde_yaml::to_string(&result.to_diff()).unwrap(); +//! let serialized = serde_yaml::to_string(&result.to_diff().unwrap()).unwrap(); //! assert_eq!( //! serialized, //! concat!( diff --git a/src/operation_transformation.rs b/src/operation_transformation.rs index 36803e9..bbaf73e 100644 --- a/src/operation_transformation.rs +++ b/src/operation_transformation.rs @@ -156,7 +156,7 @@ mod test { .unwrap() .chars() .skip(range.start) - .take(range.end) + .take(range.len()) .collect::() }) .collect::>(); diff --git a/src/operation_transformation/diff_error.rs b/src/operation_transformation/diff_error.rs index d10065f..22e717f 100644 --- a/src/operation_transformation/diff_error.rs +++ b/src/operation_transformation/diff_error.rs @@ -16,4 +16,11 @@ pub enum DiffError { /// The number of characters available from the position available: usize, }, + + /// A character count was too large to represent as i64 + #[error("Integer overflow: value {value} cannot be represented as i64")] + IntegerOverflow { + /// The value that caused the overflow + value: usize, + }, } diff --git a/src/raw_operation.rs b/src/raw_operation.rs index 1572e88..bde18c9 100644 --- a/src/raw_operation.rs +++ b/src/raw_operation.rs @@ -22,7 +22,7 @@ where { pub fn vec_from(left: &[Token], right: &[Token]) -> Vec { myers_diff(left, right) } - pub fn tokens(&self) -> &Vec> { + pub fn tokens(&self) -> &[Token] { match self { RawOperation::Insert(tokens) | RawOperation::Delete(tokens) @@ -34,7 +34,9 @@ where self.tokens().iter().map(Token::get_original_length).sum() } - pub fn get_original_text(self) -> String { self.tokens().iter().map(Token::original).collect() } + pub fn get_original_text(&self) -> String { + self.tokens().iter().map(Token::original).collect() + } pub fn is_left_joinable(&self) -> bool { let first_token = self.tokens().first(); diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 18b3e8a..4507c9a 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1,5 +1,6 @@ mod character_tokenizer; mod line_tokenizer; +mod markdown_tokenizer; mod word_tokenizer; use std::ops::Deref; @@ -22,6 +23,7 @@ pub type Tokenizer = dyn Fn(&str) -> Vec>; pub enum BuiltinTokenizer { Character = "Character", Line = "Line", + Markdown = "Markdown", Word = "Word", } @@ -31,6 +33,7 @@ pub enum BuiltinTokenizer { pub enum BuiltinTokenizer { Character, Line, + Markdown, Word, } @@ -41,6 +44,7 @@ impl Deref for BuiltinTokenizer { match self { BuiltinTokenizer::Character => &character_tokenizer::character_tokenizer, BuiltinTokenizer::Line => &line_tokenizer::line_tokenizer, + BuiltinTokenizer::Markdown => &markdown_tokenizer::markdown_tokenizer, BuiltinTokenizer::Word => &word_tokenizer::word_tokenizer, #[cfg(feature = "wasm")] BuiltinTokenizer::__Invalid => panic!("Unexpected tokenizer type"), diff --git a/src/types/cursor_position.rs b/src/types/cursor_position.rs index 0a817a5..d69a1b2 100644 --- a/src/types/cursor_position.rs +++ b/src/types/cursor_position.rs @@ -10,8 +10,8 @@ use wasm_bindgen::prelude::*; #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone, PartialEq, Default)] pub struct CursorPosition { - pub id: usize, - pub char_index: usize, + pub(crate) id: usize, + pub(crate) char_index: usize, } #[cfg_attr(feature = "wasm", wasm_bindgen)] diff --git a/src/types/text_with_cursors.rs b/src/types/text_with_cursors.rs index 8f4af19..f58a34e 100644 --- a/src/types/text_with_cursors.rs +++ b/src/types/text_with_cursors.rs @@ -39,6 +39,11 @@ impl TextWithCursors { pub fn cursors(&self) -> Vec { self.cursors.clone() } } +impl TextWithCursors { + #[must_use] + pub fn text_ref(&self) -> &str { &self.text } +} + impl<'a> From<&'a str> for TextWithCursors { fn from(text: &'a str) -> Self { Self { diff --git a/src/utils/myers_diff.rs b/src/utils/myers_diff.rs index de7c419..dd9f961 100644 --- a/src/utils/myers_diff.rs +++ b/src/utils/myers_diff.rs @@ -90,7 +90,7 @@ impl V { let offset = isize::try_from(max_d).expect("max_d must fit in isize"); Self { offset, - v: vec![0; 2 * max_d], + v: vec![0; 2 * max_d + 1], } } diff --git a/src/utils/string_builder.rs b/src/utils/string_builder.rs index fd89d63..abe372c 100644 --- a/src/utils/string_builder.rs +++ b/src/utils/string_builder.rs @@ -1,10 +1,10 @@ -use std::{fmt, iter::Iterator}; +use std::{fmt, str::Chars}; /// A helper for building a string sequentially from an original string via /// insertions, deletions, and copies. All operations use character counts, /// safe for UTF-8. Methods must be called in-order. pub struct StringBuilder<'a> { - original: Box + 'a>, + original: Chars<'a>, buffer: String, #[cfg(debug_assertions)] @@ -26,7 +26,7 @@ impl fmt::Debug for StringBuilder<'_> { impl StringBuilder<'_> { pub fn new(original: &str) -> StringBuilder<'_> { StringBuilder { - original: Box::new(original.chars()), + original: original.chars(), buffer: String::with_capacity(original.len()), #[cfg(debug_assertions)] diff --git a/src/wasm.rs b/src/wasm.rs index 17dc7d5..c15a97f 100644 --- a/src/wasm.rs +++ b/src/wasm.rs @@ -5,9 +5,6 @@ use wasm_bindgen::prelude::*; use crate::{BuiltinTokenizer, CursorPosition, EditedText, SpanWithHistory, TextWithCursors}; -#[global_allocator] -static ALLOC: wee_alloc::WeeAlloc<'_> = wee_alloc::WeeAlloc::INIT; - /// WASM wrapper around `crate::reconcile` for merging text #[wasm_bindgen(js_name = reconcile)] #[must_use] @@ -34,11 +31,11 @@ pub fn reconcile_with_history( set_panic_hook(); let reconciled = crate::reconcile(parent, left, right, &*tokenizer); - let text_with_cursors = reconciled.apply(); + let (text_with_cursors, history) = reconciled.apply_with_all(); TextWithCursorsAndHistory { text_with_cursors, - history: reconciled.apply_with_history(), + history, } } @@ -81,17 +78,23 @@ pub fn generic_reconcile( /// WASM wrapper around getting a compact diff representation of two texts as a /// list of numbers and strings +/// +/// # Errors +/// +/// Returns a JS error if integer overflow occurs during diff computation. #[wasm_bindgen(js_name = diff)] -#[must_use] -pub fn diff(parent: &str, changed: &TextWithCursors, tokenizer: BuiltinTokenizer) -> Vec { +pub fn diff( + parent: &str, + changed: &TextWithCursors, + tokenizer: BuiltinTokenizer, +) -> Result, JsValue> { set_panic_hook(); let edited_text = EditedText::from_strings_with_tokenizer(parent, changed, &*tokenizer); edited_text .to_diff() - .into_iter() - .map(std::convert::Into::into) - .collect() + .map(|diff| diff.into_iter().map(std::convert::Into::into).collect()) + .map_err(|e| JsValue::from_str(&e.to_string())) } /// Inverse of `diff`, applies a compact diff representation to a parent text diff --git a/tests/example_document.rs b/tests/example_document.rs index 2984077..6d8f370 100644 --- a/tests/example_document.rs +++ b/tests/example_document.rs @@ -65,9 +65,9 @@ impl ExampleDocument { let mut result = merged.text(); for (i, cursor) in merged.cursors().iter().enumerate() { assert!( - cursor.char_index <= result.len(), // equals in case of insert at the end + cursor.char_index() <= result.len(), // equals in case of insert at the end "Cursor index out of bounds: {} > {} when testing for '{}.'", - cursor.char_index, + cursor.char_index(), result.len(), result ); @@ -75,7 +75,7 @@ impl ExampleDocument { result.insert( result .char_indices() - .nth(cursor.char_index + i) + .nth(cursor.char_index() + i) .map_or_else(|| result.len(), |(byte_index, _)| byte_index), /* find the utf8 char index of the insert * in byte index */ '|', @@ -94,10 +94,7 @@ impl ExampleDocument { let mut cursors = Vec::new(); for (i, c) in text.chars().enumerate() { if c == '|' { - cursors.push(CursorPosition { - id: 0, - char_index: i - cursors.len(), - }); + cursors.push(CursorPosition::new(0, i - cursors.len())); } } cursors diff --git a/tests/test.rs b/tests/test.rs index 2e54e6c..68d8762 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -49,12 +49,14 @@ fn test_document_one_way_with_serialisation() { &*BuiltinTokenizer::Word, ); - let serialised_left = - serde_yaml::from_str(&serde_yaml::to_string(&left_operations.to_diff()).unwrap()) - .unwrap(); - let serialised_right = - serde_yaml::from_str(&serde_yaml::to_string(&right_operations.to_diff()).unwrap()) - .unwrap(); + let serialised_left = serde_yaml::from_str( + &serde_yaml::to_string(&left_operations.to_diff().unwrap()).unwrap(), + ) + .unwrap(); + let serialised_right = serde_yaml::from_str( + &serde_yaml::to_string(&right_operations.to_diff().unwrap()).unwrap(), + ) + .unwrap(); let restored_left_operations = EditedText::from_diff(&parent, serialised_left, &*BuiltinTokenizer::Word).unwrap(); diff --git a/tests/wasm.rs b/tests/wasm.rs index 304ee6e..82af503 100644 --- a/tests/wasm.rs +++ b/tests/wasm.rs @@ -60,7 +60,7 @@ fn test_diff() { let parent = "hello "; let changed = "world"; - let result = diff(parent, &changed.into(), BuiltinTokenizer::Word); + let result = diff(parent, &changed.into(), BuiltinTokenizer::Word).unwrap(); assert_eq!(result.len(), 2); let first: i64 = result[0].clone().try_into().unwrap();