Clean up API and small fixes

This commit is contained in:
Andras Schmelczer 2026-03-10 21:35:09 +00:00
parent 665cdb2881
commit a80da338e4
13 changed files with 56 additions and 36 deletions

View file

@ -100,11 +100,11 @@
//! let parent = "Hello world";
//! let left = TextWithCursors::new(
//! "Hello beautiful world".to_string(),
//! vec![CursorPosition { id: 1, char_index: 6 }] // After "Hello "
//! vec![CursorPosition::new(1, 6)] // After "Hello "
//! );
//! let right = TextWithCursors::new(
//! "Hi world".to_string(),
//! vec![CursorPosition { id: 2, char_index: 0 }] // At the beginning
//! vec![CursorPosition::new(2, 0)] // At the beginning
//! );
//!
//! let result = reconcile(parent, &left, &right, &*BuiltinTokenizer::Word);
@ -173,7 +173,7 @@
//! &changes.into()
//! );
//!
//! let serialized = serde_yaml::to_string(&result.to_diff()).unwrap();
//! let serialized = serde_yaml::to_string(&result.to_diff().unwrap()).unwrap();
//! assert_eq!(
//! serialized,
//! concat!(

View file

@ -156,7 +156,7 @@ mod test {
.unwrap()
.chars()
.skip(range.start)
.take(range.end)
.take(range.len())
.collect::<String>()
})
.collect::<Vec<_>>();

View file

@ -16,4 +16,11 @@ pub enum DiffError {
/// The number of characters available from the position
available: usize,
},
/// A character count was too large to represent as i64
#[error("Integer overflow: value {value} cannot be represented as i64")]
IntegerOverflow {
/// The value that caused the overflow
value: usize,
},
}

View file

@ -22,7 +22,7 @@ where
{
pub fn vec_from(left: &[Token<T>], right: &[Token<T>]) -> Vec<Self> { myers_diff(left, right) }
pub fn tokens(&self) -> &Vec<Token<T>> {
pub fn tokens(&self) -> &[Token<T>] {
match self {
RawOperation::Insert(tokens)
| RawOperation::Delete(tokens)
@ -34,7 +34,9 @@ where
self.tokens().iter().map(Token::get_original_length).sum()
}
pub fn get_original_text(self) -> String { self.tokens().iter().map(Token::original).collect() }
pub fn get_original_text(&self) -> String {
self.tokens().iter().map(Token::original).collect()
}
pub fn is_left_joinable(&self) -> bool {
let first_token = self.tokens().first();

View file

@ -1,5 +1,6 @@
mod character_tokenizer;
mod line_tokenizer;
mod markdown_tokenizer;
mod word_tokenizer;
use std::ops::Deref;
@ -22,6 +23,7 @@ pub type Tokenizer<T> = dyn Fn(&str) -> Vec<Token<T>>;
pub enum BuiltinTokenizer {
Character = "Character",
Line = "Line",
Markdown = "Markdown",
Word = "Word",
}
@ -31,6 +33,7 @@ pub enum BuiltinTokenizer {
pub enum BuiltinTokenizer {
Character,
Line,
Markdown,
Word,
}
@ -41,6 +44,7 @@ impl Deref for BuiltinTokenizer {
match self {
BuiltinTokenizer::Character => &character_tokenizer::character_tokenizer,
BuiltinTokenizer::Line => &line_tokenizer::line_tokenizer,
BuiltinTokenizer::Markdown => &markdown_tokenizer::markdown_tokenizer,
BuiltinTokenizer::Word => &word_tokenizer::word_tokenizer,
#[cfg(feature = "wasm")]
BuiltinTokenizer::__Invalid => panic!("Unexpected tokenizer type"),

View file

@ -10,8 +10,8 @@ use wasm_bindgen::prelude::*;
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone, PartialEq, Default)]
pub struct CursorPosition {
pub id: usize,
pub char_index: usize,
pub(crate) id: usize,
pub(crate) char_index: usize,
}
#[cfg_attr(feature = "wasm", wasm_bindgen)]

View file

@ -39,6 +39,11 @@ impl TextWithCursors {
pub fn cursors(&self) -> Vec<CursorPosition> { self.cursors.clone() }
}
impl TextWithCursors {
#[must_use]
pub fn text_ref(&self) -> &str { &self.text }
}
impl<'a> From<&'a str> for TextWithCursors {
fn from(text: &'a str) -> Self {
Self {

View file

@ -90,7 +90,7 @@ impl V {
let offset = isize::try_from(max_d).expect("max_d must fit in isize");
Self {
offset,
v: vec![0; 2 * max_d],
v: vec![0; 2 * max_d + 1],
}
}

View file

@ -1,10 +1,10 @@
use std::{fmt, iter::Iterator};
use std::{fmt, str::Chars};
/// A helper for building a string sequentially from an original string via
/// insertions, deletions, and copies. All operations use character counts,
/// safe for UTF-8. Methods must be called in-order.
pub struct StringBuilder<'a> {
original: Box<dyn Iterator<Item = char> + 'a>,
original: Chars<'a>,
buffer: String,
#[cfg(debug_assertions)]
@ -26,7 +26,7 @@ impl fmt::Debug for StringBuilder<'_> {
impl StringBuilder<'_> {
pub fn new(original: &str) -> StringBuilder<'_> {
StringBuilder {
original: Box::new(original.chars()),
original: original.chars(),
buffer: String::with_capacity(original.len()),
#[cfg(debug_assertions)]

View file

@ -5,9 +5,6 @@ use wasm_bindgen::prelude::*;
use crate::{BuiltinTokenizer, CursorPosition, EditedText, SpanWithHistory, TextWithCursors};
#[global_allocator]
static ALLOC: wee_alloc::WeeAlloc<'_> = wee_alloc::WeeAlloc::INIT;
/// WASM wrapper around `crate::reconcile` for merging text
#[wasm_bindgen(js_name = reconcile)]
#[must_use]
@ -34,11 +31,11 @@ pub fn reconcile_with_history(
set_panic_hook();
let reconciled = crate::reconcile(parent, left, right, &*tokenizer);
let text_with_cursors = reconciled.apply();
let (text_with_cursors, history) = reconciled.apply_with_all();
TextWithCursorsAndHistory {
text_with_cursors,
history: reconciled.apply_with_history(),
history,
}
}
@ -81,17 +78,23 @@ pub fn generic_reconcile(
/// WASM wrapper around getting a compact diff representation of two texts as a
/// list of numbers and strings
///
/// # Errors
///
/// Returns a JS error if integer overflow occurs during diff computation.
#[wasm_bindgen(js_name = diff)]
#[must_use]
pub fn diff(parent: &str, changed: &TextWithCursors, tokenizer: BuiltinTokenizer) -> Vec<JsValue> {
pub fn diff(
parent: &str,
changed: &TextWithCursors,
tokenizer: BuiltinTokenizer,
) -> Result<Vec<JsValue>, JsValue> {
set_panic_hook();
let edited_text = EditedText::from_strings_with_tokenizer(parent, changed, &*tokenizer);
edited_text
.to_diff()
.into_iter()
.map(std::convert::Into::into)
.collect()
.map(|diff| diff.into_iter().map(std::convert::Into::into).collect())
.map_err(|e| JsValue::from_str(&e.to_string()))
}
/// Inverse of `diff`, applies a compact diff representation to a parent text

View file

@ -65,9 +65,9 @@ impl ExampleDocument {
let mut result = merged.text();
for (i, cursor) in merged.cursors().iter().enumerate() {
assert!(
cursor.char_index <= result.len(), // equals in case of insert at the end
cursor.char_index() <= result.len(), // equals in case of insert at the end
"Cursor index out of bounds: {} > {} when testing for '{}.'",
cursor.char_index,
cursor.char_index(),
result.len(),
result
);
@ -75,7 +75,7 @@ impl ExampleDocument {
result.insert(
result
.char_indices()
.nth(cursor.char_index + i)
.nth(cursor.char_index() + i)
.map_or_else(|| result.len(), |(byte_index, _)| byte_index), /* find the utf8 char index of the insert
* in byte index */
'|',
@ -94,10 +94,7 @@ impl ExampleDocument {
let mut cursors = Vec::new();
for (i, c) in text.chars().enumerate() {
if c == '|' {
cursors.push(CursorPosition {
id: 0,
char_index: i - cursors.len(),
});
cursors.push(CursorPosition::new(0, i - cursors.len()));
}
}
cursors

View file

@ -49,11 +49,13 @@ fn test_document_one_way_with_serialisation() {
&*BuiltinTokenizer::Word,
);
let serialised_left =
serde_yaml::from_str(&serde_yaml::to_string(&left_operations.to_diff()).unwrap())
let serialised_left = serde_yaml::from_str(
&serde_yaml::to_string(&left_operations.to_diff().unwrap()).unwrap(),
)
.unwrap();
let serialised_right =
serde_yaml::from_str(&serde_yaml::to_string(&right_operations.to_diff()).unwrap())
let serialised_right = serde_yaml::from_str(
&serde_yaml::to_string(&right_operations.to_diff().unwrap()).unwrap(),
)
.unwrap();
let restored_left_operations =

View file

@ -60,7 +60,7 @@ fn test_diff() {
let parent = "hello ";
let changed = "world";
let result = diff(parent, &changed.into(), BuiltinTokenizer::Word);
let result = diff(parent, &changed.into(), BuiltinTokenizer::Word).unwrap();
assert_eq!(result.len(), 2);
let first: i64 = result[0].clone().try_into().unwrap();