Remove is_binary from API

This commit is contained in:
Andras Schmelczer 2025-11-03 21:36:21 +00:00
parent 6191d1adb3
commit e6615463ed
7 changed files with 41 additions and 83 deletions

View file

@ -4,7 +4,6 @@ import {
TextWithCursors as wasmTextWithCursors,
SpanWithHistory as wasmSpanWithHistory,
reconcileWithHistory as wasmReconcileWithHistory,
isBinary as wasmIsBinary,
getCompactDiff as wasmGetCompactDiff,
initSync,
} from 'reconcile-text';
@ -272,19 +271,6 @@ export function reconcileWithHistory(
};
}
/**
* Check (using heuristics) if the given data is binary or text content.
*
* Only text inputs can be reconciled using the library's functions.
*
* @param data - The data to check for binary content. This should be a Uint8Array.
* @returns True if the data is likely binary, false if it is likely text.
*/
export function isBinary(data: Uint8Array): boolean {
init();
return wasmIsBinary(data);
}
function init() {
if (isInitialised) {
return;

View file

@ -217,7 +217,6 @@ pub use types::{
cursor_position::CursorPosition, history::History, side::Side,
span_with_history::SpanWithHistory, text_with_cursors::TextWithCursors,
};
pub use utils::is_binary::is_binary;
#[cfg(feature = "wasm")]
pub mod wasm;

View file

@ -1,6 +1,6 @@
pub mod common_prefix_len;
pub mod common_suffix_len;
pub mod find_longest_prefix_contained_within;
pub mod is_binary;
pub mod myers_diff;
pub mod string_builder;
pub mod string_or_nothing;

View file

@ -1,26 +0,0 @@
/// Heuristically determine if the given data is a binary or a text file's
/// content.
///
/// Only text inputs can be reconciled using the crate's functions.
#[must_use]
pub fn is_binary(data: &[u8]) -> bool {
if data.contains(&0) {
// Even though the NUL character is valid in UTF-8, it's highly suspicious in
// human-readable text.
return true;
}
std::str::from_utf8(data).is_err()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_is_binary() {
assert!(is_binary(&[0, 159, 146, 150]));
assert!(is_binary(&[0, 12]));
assert!(!is_binary(b"hello"));
}
}

View file

@ -0,0 +1,26 @@
/// Determine if the given data is a binary or a text file's content.
///
/// Returns the UTF8 parsed string if it's a text, or `None` if it's likely
/// binary.
#[must_use]
pub fn string_or_nothing(data: &[u8]) -> Option<String> {
if data.contains(&0) {
// Even though the NUL character is valid in UTF-8, it's highly suspicious in
// human-readable text.
return None;
}
std::str::from_utf8(data).map(|s| s.to_string()).ok()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_string_or_nothing() {
assert_eq!(string_or_nothing(&[0, 159, 146, 150]), None);
assert_eq!(string_or_nothing(&[0, 12]), None);
assert_eq!(string_or_nothing(b"hello"), Some("hello".into()));
}
}

View file

@ -3,7 +3,10 @@ use core::str;
use wasm_bindgen::prelude::*;
use crate::{BuiltinTokenizer, CursorPosition, SpanWithHistory, TextWithCursors};
use crate::{
BuiltinTokenizer, CursorPosition, SpanWithHistory, TextWithCursors,
utils::string_or_nothing::string_or_nothing,
};
#[global_allocator]
static ALLOC: wee_alloc::WeeAlloc<'_> = wee_alloc::WeeAlloc::INIT;
@ -54,10 +57,6 @@ pub fn reconcile_with_history(
/// # Returns
///
/// The merged document.
///
/// # Panics
///
/// If any of the input documents are not valid UTF-8 strings.
#[wasm_bindgen(js_name = genericReconcile)]
#[must_use]
pub fn generic_reconcile(
@ -68,22 +67,17 @@ pub fn generic_reconcile(
) -> Vec<u8> {
set_panic_hook();
if crate::is_binary(parent) || crate::is_binary(left) || crate::is_binary(right) {
right.to_vec()
if let (Some(parent), Some(left), Some(right)) = (
string_or_nothing(parent),
string_or_nothing(left),
string_or_nothing(right),
) {
crate::reconcile(&parent, &left.into(), &right.into(), &*tokenizer)
.apply()
.text()
.into_bytes()
} else {
crate::reconcile(
str::from_utf8(parent).expect("parent must be valid UTF-8 because it's not binary"),
&str::from_utf8(left)
.expect("left must be valid UTF-8 because it's not binary")
.into(),
&str::from_utf8(right)
.expect("right must be valid UTF-8 because it's not binary")
.into(),
&*tokenizer,
)
.apply()
.text()
.into_bytes()
right.to_vec()
}
}
@ -106,15 +100,6 @@ pub fn get_compact_diff(
serde_json::to_string(&change_set).expect("Failed to serialize change set")
}
/// Heuristically determine if the given data is a binary or a text file's
/// content.
#[wasm_bindgen(js_name = isBinary)]
#[must_use]
pub fn is_binary(data: &[u8]) -> bool {
set_panic_hook();
crate::is_binary(data)
}
fn set_panic_hook() {
// https://github.com/rustwasm/console_error_panic_hook#readme
#[cfg(feature = "console_error_panic_hook")]

View file

@ -55,13 +55,6 @@ fn test_merge_binary() {
);
}
#[wasm_bindgen_test(unsupported = test)]
fn test_is_binary() {
assert!(is_binary(&[0, 159, 146, 150]));
assert!(is_binary(&[0, 12]));
assert!(!is_binary(b"hello"));
}
#[wasm_bindgen_test(unsupported = test)]
fn test_get_compact_diff() {
let parent = "hello ";
@ -69,8 +62,3 @@ fn test_get_compact_diff() {
let result = get_compact_diff(parent, &changed.into(), BuiltinTokenizer::Word);
assert_eq!(result, "{\"operations\":[-6,\"world\"],\"cursors\":[]}");
}
#[wasm_bindgen_test(unsupported = test)]
fn test_is_binary_empty() {
assert!(!is_binary(b""));
}