From 8e1690a0bfdfa7a3547f55dc5be30986f674e33c Mon Sep 17 00:00:00 2001 From: Andras Schmelczer Date: Sat, 6 Dec 2025 11:48:22 +0000 Subject: [PATCH] Add diff error --- src/lib.rs | 4 +- src/operation_transformation/diff_error.rs | 19 +++++ src/operation_transformation/edited_text.rs | 81 +++++++++++++++++++-- 3 files changed, 96 insertions(+), 8 deletions(-) create mode 100644 src/operation_transformation/diff_error.rs diff --git a/src/lib.rs b/src/lib.rs index 2119bea..0720cf2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -187,7 +187,7 @@ //! original, //! deserialized, //! &*BuiltinTokenizer::Word -//! ); +//! ).unwrap(); //! assert_eq!( //! reconstructed.apply().text(), //! "Merging text is easy with reconcile!" @@ -215,7 +215,7 @@ mod tokenizer; mod types; mod utils; -pub use operation_transformation::{EditedText, reconcile}; +pub use operation_transformation::{DiffError, EditedText, reconcile}; pub use tokenizer::{BuiltinTokenizer, Tokenizer, token::Token}; pub use types::{ cursor_position::CursorPosition, history::History, number_or_string::NumberOrString, diff --git a/src/operation_transformation/diff_error.rs b/src/operation_transformation/diff_error.rs new file mode 100644 index 0000000..d10065f --- /dev/null +++ b/src/operation_transformation/diff_error.rs @@ -0,0 +1,19 @@ +use thiserror::Error; + +/// Error type for invalid diff operations +#[derive(Error, Debug, Clone, PartialEq)] +pub enum DiffError { + /// The diff references a range that exceeds the original text length + #[error( + "Invalid diff: attempting to access {requested} characters starting at position \ + {position}, but original text only has {available} characters remaining" + )] + LengthExceedsOriginal { + /// The position where the operation starts + position: usize, + /// The number of characters requested + requested: usize, + /// The number of characters available from the position + available: usize, + }, +} diff --git a/src/operation_transformation/edited_text.rs b/src/operation_transformation/edited_text.rs index f27fea4..b47936e 100644 --- a/src/operation_transformation/edited_text.rs +++ b/src/operation_transformation/edited_text.rs @@ -6,7 +6,7 @@ use serde::{Deserialize, Serialize}; use crate::{ BuiltinTokenizer, CursorPosition, TextWithCursors, operation_transformation::{ - Operation, + DiffError, Operation, utils::{cook_operations::cook_operations, elongate_operations::elongate_operations}, }, raw_operation::RawOperation, @@ -424,15 +424,19 @@ where /// Deserialize an `EditedText` from a change list and the original text. /// + /// # Errors + /// + /// Returns `DiffError::LengthExceedsOriginal` if the diff references a + /// range that exceeds the original text length. + /// /// # Panics /// /// Panics if there's an integer overflow in i64. - #[must_use] pub fn from_diff( original_text: &'a str, diff: Vec, tokenizer: &Tokenizer, - ) -> EditedText<'a, T> { + ) -> Result, DiffError> { let mut operations: Vec> = Vec::with_capacity(diff.len()); let mut order = 0; @@ -441,6 +445,17 @@ where NumberOrString::Number(length) => { if length >= 0 { let length = usize::try_from(length).expect("length must fit in usize"); + + // Validate that the range doesn't exceed the original text + let text_length = original_text.chars().count(); + if order + length > text_length { + return Err(DiffError::LengthExceedsOriginal { + position: order, + requested: length, + available: text_length.saturating_sub(order), + }); + } + let original_characters: String = original_text.chars().skip(order).take(length).collect(); @@ -453,6 +468,17 @@ where } else { let length = usize::try_from(-length).expect("negative length must fit in usize"); + + // Validate that the delete range doesn't exceed the original text + let text_length = original_text.chars().count(); + if order + length > text_length { + return Err(DiffError::LengthExceedsOriginal { + position: order, + requested: length, + available: text_length.saturating_sub(order), + }); + } + operations.push(Operation::create_delete(order, length)); order += length; } @@ -465,12 +491,12 @@ where } let operation_count = operations.len(); - EditedText::new( + Ok(EditedText::new( original_text, operations, vec![Side::Left; operation_count], vec![], - ) + )) } } @@ -520,6 +546,49 @@ mod tests { assert_eq!(operations.apply().text(), expected); } + #[test] + fn test_from_diff_length_exceeds_original() { + let result = EditedText::from_diff( + "hello", + vec![ + 10.into(), // too large equal span - should error + " world".into(), + ], + &*BuiltinTokenizer::Word, + ); + + assert!(result.is_err()); + match result { + Err(DiffError::LengthExceedsOriginal { + position, + requested, + available, + }) => { + assert_eq!(position, 0); + assert_eq!(requested, 10); + assert_eq!(available, 5); + } + _ => panic!("Expected LengthExceedsOriginal error"), + } + } + + #[test] + fn test_from_diff_valid() { + let edited_text = EditedText::from_diff( + "hello", + vec![ + 5.into(), // exact length + " world".into(), + ], + &*BuiltinTokenizer::Word, + ) + .unwrap(); + + let content = edited_text.apply().text(); + + assert_eq!(content, "hello world"); + } + #[cfg(feature = "serde")] #[test] fn test_changes_deserialisation() { @@ -542,7 +611,7 @@ mod tests { let changes = edited_text.to_diff(); let deserialized_edited_text = - EditedText::from_diff(original, changes, &*BuiltinTokenizer::Word); + EditedText::from_diff(original, changes, &*BuiltinTokenizer::Word).unwrap(); assert_eq!(deserialized_edited_text.apply().text(), updated); }