From e6615463edf3168d6aed5a88dd521d98fd5aed1c Mon Sep 17 00:00:00 2001 From: Andras Schmelczer Date: Mon, 3 Nov 2025 21:36:21 +0000 Subject: [PATCH] Remove is_binary from API --- reconcile-js/src/index.ts | 14 ----------- src/lib.rs | 1 - src/utils.rs | 2 +- src/utils/is_binary.rs | 26 -------------------- src/utils/string_or_nothing.rs | 26 ++++++++++++++++++++ src/wasm.rs | 43 +++++++++++----------------------- tests/wasm.rs | 12 ---------- 7 files changed, 41 insertions(+), 83 deletions(-) delete mode 100644 src/utils/is_binary.rs create mode 100644 src/utils/string_or_nothing.rs diff --git a/reconcile-js/src/index.ts b/reconcile-js/src/index.ts index 247db26..aa07532 100644 --- a/reconcile-js/src/index.ts +++ b/reconcile-js/src/index.ts @@ -4,7 +4,6 @@ import { TextWithCursors as wasmTextWithCursors, SpanWithHistory as wasmSpanWithHistory, reconcileWithHistory as wasmReconcileWithHistory, - isBinary as wasmIsBinary, getCompactDiff as wasmGetCompactDiff, initSync, } from 'reconcile-text'; @@ -272,19 +271,6 @@ export function reconcileWithHistory( }; } -/** - * Check (using heuristics) if the given data is binary or text content. - * - * Only text inputs can be reconciled using the library's functions. - * - * @param data - The data to check for binary content. This should be a Uint8Array. - * @returns True if the data is likely binary, false if it is likely text. - */ -export function isBinary(data: Uint8Array): boolean { - init(); - return wasmIsBinary(data); -} - function init() { if (isInitialised) { return; diff --git a/src/lib.rs b/src/lib.rs index 1dd78ff..a760a95 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -217,7 +217,6 @@ pub use types::{ cursor_position::CursorPosition, history::History, side::Side, span_with_history::SpanWithHistory, text_with_cursors::TextWithCursors, }; -pub use utils::is_binary::is_binary; #[cfg(feature = "wasm")] pub mod wasm; diff --git a/src/utils.rs b/src/utils.rs index f249825..e6966c6 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,6 +1,6 @@ pub mod common_prefix_len; pub mod common_suffix_len; pub mod find_longest_prefix_contained_within; -pub mod is_binary; pub mod myers_diff; pub mod string_builder; +pub mod string_or_nothing; diff --git a/src/utils/is_binary.rs b/src/utils/is_binary.rs deleted file mode 100644 index 09bfcf9..0000000 --- a/src/utils/is_binary.rs +++ /dev/null @@ -1,26 +0,0 @@ -/// Heuristically determine if the given data is a binary or a text file's -/// content. -/// -/// Only text inputs can be reconciled using the crate's functions. -#[must_use] -pub fn is_binary(data: &[u8]) -> bool { - if data.contains(&0) { - // Even though the NUL character is valid in UTF-8, it's highly suspicious in - // human-readable text. - return true; - } - - std::str::from_utf8(data).is_err() -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_is_binary() { - assert!(is_binary(&[0, 159, 146, 150])); - assert!(is_binary(&[0, 12])); - assert!(!is_binary(b"hello")); - } -} diff --git a/src/utils/string_or_nothing.rs b/src/utils/string_or_nothing.rs new file mode 100644 index 0000000..1ca7d2b --- /dev/null +++ b/src/utils/string_or_nothing.rs @@ -0,0 +1,26 @@ +/// Determine if the given data is a binary or a text file's content. +/// +/// Returns the UTF8 parsed string if it's a text, or `None` if it's likely +/// binary. +#[must_use] +pub fn string_or_nothing(data: &[u8]) -> Option { + if data.contains(&0) { + // Even though the NUL character is valid in UTF-8, it's highly suspicious in + // human-readable text. + return None; + } + + std::str::from_utf8(data).map(|s| s.to_string()).ok() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_string_or_nothing() { + assert_eq!(string_or_nothing(&[0, 159, 146, 150]), None); + assert_eq!(string_or_nothing(&[0, 12]), None); + assert_eq!(string_or_nothing(b"hello"), Some("hello".into())); + } +} diff --git a/src/wasm.rs b/src/wasm.rs index 0fd0aca..e2e83f3 100644 --- a/src/wasm.rs +++ b/src/wasm.rs @@ -3,7 +3,10 @@ use core::str; use wasm_bindgen::prelude::*; -use crate::{BuiltinTokenizer, CursorPosition, SpanWithHistory, TextWithCursors}; +use crate::{ + BuiltinTokenizer, CursorPosition, SpanWithHistory, TextWithCursors, + utils::string_or_nothing::string_or_nothing, +}; #[global_allocator] static ALLOC: wee_alloc::WeeAlloc<'_> = wee_alloc::WeeAlloc::INIT; @@ -54,10 +57,6 @@ pub fn reconcile_with_history( /// # Returns /// /// The merged document. -/// -/// # Panics -/// -/// If any of the input documents are not valid UTF-8 strings. #[wasm_bindgen(js_name = genericReconcile)] #[must_use] pub fn generic_reconcile( @@ -68,22 +67,17 @@ pub fn generic_reconcile( ) -> Vec { set_panic_hook(); - if crate::is_binary(parent) || crate::is_binary(left) || crate::is_binary(right) { - right.to_vec() + if let (Some(parent), Some(left), Some(right)) = ( + string_or_nothing(parent), + string_or_nothing(left), + string_or_nothing(right), + ) { + crate::reconcile(&parent, &left.into(), &right.into(), &*tokenizer) + .apply() + .text() + .into_bytes() } else { - crate::reconcile( - str::from_utf8(parent).expect("parent must be valid UTF-8 because it's not binary"), - &str::from_utf8(left) - .expect("left must be valid UTF-8 because it's not binary") - .into(), - &str::from_utf8(right) - .expect("right must be valid UTF-8 because it's not binary") - .into(), - &*tokenizer, - ) - .apply() - .text() - .into_bytes() + right.to_vec() } } @@ -106,15 +100,6 @@ pub fn get_compact_diff( serde_json::to_string(&change_set).expect("Failed to serialize change set") } -/// Heuristically determine if the given data is a binary or a text file's -/// content. -#[wasm_bindgen(js_name = isBinary)] -#[must_use] -pub fn is_binary(data: &[u8]) -> bool { - set_panic_hook(); - crate::is_binary(data) -} - fn set_panic_hook() { // https://github.com/rustwasm/console_error_panic_hook#readme #[cfg(feature = "console_error_panic_hook")] diff --git a/tests/wasm.rs b/tests/wasm.rs index 6a9d556..03a0f1a 100644 --- a/tests/wasm.rs +++ b/tests/wasm.rs @@ -55,13 +55,6 @@ fn test_merge_binary() { ); } -#[wasm_bindgen_test(unsupported = test)] -fn test_is_binary() { - assert!(is_binary(&[0, 159, 146, 150])); - assert!(is_binary(&[0, 12])); - assert!(!is_binary(b"hello")); -} - #[wasm_bindgen_test(unsupported = test)] fn test_get_compact_diff() { let parent = "hello "; @@ -69,8 +62,3 @@ fn test_get_compact_diff() { let result = get_compact_diff(parent, &changed.into(), BuiltinTokenizer::Word); assert_eq!(result, "{\"operations\":[-6,\"world\"],\"cursors\":[]}"); } - -#[wasm_bindgen_test(unsupported = test)] -fn test_is_binary_empty() { - assert!(!is_binary(b"")); -}