Add diff applying error & improve CI (#32)

* Use stable rust

* Add From impls

* Revert to nightly

* Improve dev env & CI setup

* Update lock

* Add thiserror

* Add diff error

* Fix tests

* Lint

* Rename NumberOrString

* Format

* Fix lint script
This commit is contained in:
Andras Schmelczer 2025-12-06 21:54:08 +00:00 committed by GitHub
parent e03b9147df
commit 88d48afce3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 195 additions and 1192 deletions

View file

@ -187,7 +187,7 @@
//! original,
//! deserialized,
//! &*BuiltinTokenizer::Word
//! );
//! ).unwrap();
//! assert_eq!(
//! reconstructed.apply().text(),
//! "Merging text is easy with reconcile!"
@ -215,11 +215,11 @@ mod tokenizer;
mod types;
mod utils;
pub use operation_transformation::{EditedText, reconcile};
pub use operation_transformation::{DiffError, EditedText, reconcile};
pub use tokenizer::{BuiltinTokenizer, Tokenizer, token::Token};
pub use types::{
cursor_position::CursorPosition, history::History, number_or_string::NumberOrString,
side::Side, span_with_history::SpanWithHistory, text_with_cursors::TextWithCursors,
cursor_position::CursorPosition, history::History, number_or_text::NumberOrText, side::Side,
span_with_history::SpanWithHistory, text_with_cursors::TextWithCursors,
};
#[cfg(feature = "wasm")]

View file

@ -1,8 +1,10 @@
mod diff_error;
mod edited_text;
mod operation;
mod utils;
use std::fmt::Debug;
pub use diff_error::DiffError;
pub use edited_text::EditedText;
pub use operation::Operation;

View file

@ -0,0 +1,19 @@
use thiserror::Error;
/// Error type for invalid diff operations
#[derive(Error, Debug, Clone, PartialEq)]
pub enum DiffError {
/// The diff references a range that exceeds the original text length
#[error(
"Invalid diff: attempting to access {requested} characters starting at position \
{position}, but original text only has {available} characters remaining"
)]
LengthExceedsOriginal {
/// The position where the operation starts
position: usize,
/// The number of characters requested
requested: usize,
/// The number of characters available from the position
available: usize,
},
}

View file

@ -6,13 +6,13 @@ use serde::{Deserialize, Serialize};
use crate::{
BuiltinTokenizer, CursorPosition, TextWithCursors,
operation_transformation::{
Operation,
DiffError, Operation,
utils::{cook_operations::cook_operations, elongate_operations::elongate_operations},
},
raw_operation::RawOperation,
tokenizer::Tokenizer,
types::{
history::History, number_or_string::NumberOrString, side::Side,
history::History, number_or_text::NumberOrText, side::Side,
span_with_history::SpanWithHistory,
},
utils::string_builder::StringBuilder,
@ -366,8 +366,8 @@ where
///
/// Panics if there's an integer overflow in i64.
#[must_use]
pub fn to_diff(&self) -> Vec<NumberOrString> {
let mut result: Vec<NumberOrString> = Vec::with_capacity(self.operations.len());
pub fn to_diff(&self) -> Vec<NumberOrText> {
let mut result: Vec<NumberOrText> = Vec::with_capacity(self.operations.len());
let mut previous_equal: Option<usize> = None;
for operation in &self.operations {
@ -382,7 +382,7 @@ where
Operation::Insert { text, .. } => {
if let Some(prev_length) = previous_equal {
result.push(NumberOrString::Number(
result.push(NumberOrText::Number(
i64::try_from(prev_length).expect("prev_length must fit in i64"),
));
previous_equal = None;
@ -392,7 +392,7 @@ where
.iter()
.map(super::super::tokenizer::token::Token::original)
.collect();
result.push(NumberOrString::Text(text));
result.push(NumberOrText::Text(text));
}
Operation::Delete {
@ -400,7 +400,7 @@ where
..
} => {
if let Some(prev_length) = previous_equal {
result.push(NumberOrString::Number(
result.push(NumberOrText::Number(
i64::try_from(prev_length).expect("prev_length must fit in i64"),
));
previous_equal = None;
@ -408,13 +408,13 @@ where
let count = i64::try_from(*deleted_character_count)
.expect("deleted_character_count must fit in i64");
result.push(NumberOrString::Number(-count));
result.push(NumberOrText::Number(-count));
}
}
}
if let Some(prev_length) = previous_equal {
result.push(NumberOrString::Number(
result.push(NumberOrText::Number(
i64::try_from(prev_length).expect("prev_length must fit in i64"),
));
}
@ -424,23 +424,38 @@ where
/// Deserialize an `EditedText` from a change list and the original text.
///
/// # Errors
///
/// Returns `DiffError::LengthExceedsOriginal` if the diff references a
/// range that exceeds the original text length.
///
/// # Panics
///
/// Panics if there's an integer overflow in i64.
#[must_use]
pub fn from_diff(
original_text: &'a str,
diff: Vec<NumberOrString>,
diff: Vec<NumberOrText>,
tokenizer: &Tokenizer<T>,
) -> EditedText<'a, T> {
) -> Result<EditedText<'a, T>, DiffError> {
let mut operations: Vec<Operation<T>> = Vec::with_capacity(diff.len());
let mut order = 0;
for item in diff {
match item {
NumberOrString::Number(length) => {
NumberOrText::Number(length) => {
if length >= 0 {
let length = usize::try_from(length).expect("length must fit in usize");
// Validate that the range doesn't exceed the original text
let text_length = original_text.chars().count();
if order + length > text_length {
return Err(DiffError::LengthExceedsOriginal {
position: order,
requested: length,
available: text_length.saturating_sub(order),
});
}
let original_characters: String =
original_text.chars().skip(order).take(length).collect();
@ -453,11 +468,22 @@ where
} else {
let length =
usize::try_from(-length).expect("negative length must fit in usize");
// Validate that the delete range doesn't exceed the original text
let text_length = original_text.chars().count();
if order + length > text_length {
return Err(DiffError::LengthExceedsOriginal {
position: order,
requested: length,
available: text_length.saturating_sub(order),
});
}
operations.push(Operation::create_delete(order, length));
order += length;
}
}
NumberOrString::Text(text) => {
NumberOrText::Text(text) => {
let tokens = tokenizer(&text);
operations.push(Operation::create_insert(order, tokens));
}
@ -465,12 +491,12 @@ where
}
let operation_count = operations.len();
EditedText::new(
Ok(EditedText::new(
original_text,
operations,
vec![Side::Left; operation_count],
vec![],
)
))
}
}
@ -520,6 +546,49 @@ mod tests {
assert_eq!(operations.apply().text(), expected);
}
#[test]
fn test_from_diff_length_exceeds_original() {
let result = EditedText::from_diff(
"hello",
vec![
10.into(), // too large equal span - should error
" world".into(),
],
&*BuiltinTokenizer::Word,
);
assert!(result.is_err());
match result {
Err(DiffError::LengthExceedsOriginal {
position,
requested,
available,
}) => {
assert_eq!(position, 0);
assert_eq!(requested, 10);
assert_eq!(available, 5);
}
_ => panic!("Expected LengthExceedsOriginal error"),
}
}
#[test]
fn test_from_diff_valid() {
let edited_text = EditedText::from_diff(
"hello",
vec![
5.into(), // exact length
" world".into(),
],
&*BuiltinTokenizer::Word,
)
.unwrap();
let content = edited_text.apply().text();
assert_eq!(content, "hello world");
}
#[cfg(feature = "serde")]
#[test]
fn test_changes_deserialisation() {
@ -542,7 +611,7 @@ mod tests {
let changes = edited_text.to_diff();
let deserialized_edited_text =
EditedText::from_diff(original, changes, &*BuiltinTokenizer::Word);
EditedText::from_diff(original, changes, &*BuiltinTokenizer::Word).unwrap();
assert_eq!(deserialized_edited_text.apply().text(), updated);
}

View file

@ -1,6 +1,6 @@
pub mod cursor_position;
pub mod history;
pub mod number_or_string;
pub mod number_or_text;
pub mod side;
pub mod span_with_history;
pub mod text_with_cursors;

View file

@ -1,4 +1,4 @@
use std::fmt::Debug;
use std::{borrow::Cow, fmt::Debug};
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
@ -12,18 +12,18 @@ const INTEGRAL_LIMIT: f64 = (1u64 << 53) as f64;
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "serde", serde(untagged))]
#[derive(Debug, Clone, PartialEq)]
pub enum NumberOrString {
pub enum NumberOrText {
Number(i64),
Text(String),
}
#[cfg(feature = "wasm")]
impl TryFrom<JsValue> for NumberOrString {
impl TryFrom<JsValue> for NumberOrText {
type Error = DeserialisationError;
fn try_from(value: JsValue) -> Result<Self, Self::Error> {
if let Ok(num) = value.clone().try_into() {
return Ok(NumberOrString::Number(num));
return Ok(NumberOrText::Number(num));
}
if let Some(num) = value.clone().as_f64() {
@ -34,11 +34,11 @@ impl TryFrom<JsValue> for NumberOrString {
}
#[allow(clippy::cast_possible_truncation)]
return Ok(NumberOrString::Number(num.round() as i64));
return Ok(NumberOrText::Number(num.round() as i64));
}
if let Ok(text) = value.try_into() {
return Ok(NumberOrString::Text(text));
return Ok(NumberOrText::Text(text));
}
Err(DeserialisationError::new(
@ -48,15 +48,31 @@ impl TryFrom<JsValue> for NumberOrString {
}
#[cfg(feature = "wasm")]
impl From<NumberOrString> for JsValue {
fn from(value: NumberOrString) -> Self {
impl From<NumberOrText> for JsValue {
fn from(value: NumberOrText) -> Self {
match value {
NumberOrString::Number(num) => JsValue::from(num),
NumberOrString::Text(text) => JsValue::from(text),
NumberOrText::Number(num) => JsValue::from(num),
NumberOrText::Text(text) => JsValue::from(text),
}
}
}
impl From<i64> for NumberOrText {
fn from(value: i64) -> Self { NumberOrText::Number(value) }
}
impl From<String> for NumberOrText {
fn from(value: String) -> Self { NumberOrText::Text(value) }
}
impl From<&str> for NumberOrText {
fn from(value: &str) -> Self { NumberOrText::Text(value.to_owned()) }
}
impl<'a> From<Cow<'a, str>> for NumberOrText {
fn from(value: Cow<'a, str>) -> Self { NumberOrText::Text(value.into_owned()) }
}
/// Error type for deserialisation failures
#[cfg(feature = "wasm")]
#[derive(Debug, Clone)]

View file

@ -105,16 +105,17 @@ pub fn diff(parent: &str, changed: &TextWithCursors, tokenizer: BuiltinTokenizer
pub fn undiff(parent: &str, diff: Vec<JsValue>, tokenizer: BuiltinTokenizer) -> String {
set_panic_hook();
EditedText::from_diff(
match EditedText::from_diff(
parent,
diff.into_iter()
.map(std::convert::TryInto::try_into)
.collect::<Result<_, _>>()
.expect("Invalid diff format"),
&*tokenizer,
)
.apply()
.text()
) {
Ok(edited_text) => edited_text.apply().text(),
Err(e) => panic!("{}", e),
}
}
fn set_panic_hook() {