Improve compact diff API (#24)

* Remove is_binary from API

* Format

* Rename file

* Test with more feature combinations

* Don't depend on serde for wasm

* Fix lint & tests

* Don't unwrap to MAX number

* Expose undiff to JS

* Add undiff tests

* Lint

* Change name
This commit is contained in:
Andras Schmelczer 2025-11-16 15:43:19 +00:00 committed by GitHub
parent 6191d1adb3
commit e85eb485e8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
20 changed files with 430 additions and 424 deletions

View file

@ -3,7 +3,7 @@ use core::str;
use wasm_bindgen::prelude::*;
use crate::{BuiltinTokenizer, CursorPosition, SpanWithHistory, TextWithCursors};
use crate::{BuiltinTokenizer, CursorPosition, EditedText, SpanWithHistory, TextWithCursors};
#[global_allocator]
static ALLOC: wee_alloc::WeeAlloc<'_> = wee_alloc::WeeAlloc::INIT;
@ -32,6 +32,7 @@ pub fn reconcile_with_history(
tokenizer: BuiltinTokenizer,
) -> TextWithCursorsAndHistory {
set_panic_hook();
let reconciled = crate::reconcile(parent, left, right, &*tokenizer);
let text_with_cursors = reconciled.apply();
@ -54,10 +55,6 @@ pub fn reconcile_with_history(
/// # Returns
///
/// The merged document.
///
/// # Panics
///
/// If any of the input documents are not valid UTF-8 strings.
#[wasm_bindgen(js_name = genericReconcile)]
#[must_use]
pub fn generic_reconcile(
@ -68,51 +65,56 @@ pub fn generic_reconcile(
) -> Vec<u8> {
set_panic_hook();
if crate::is_binary(parent) || crate::is_binary(left) || crate::is_binary(right) {
right.to_vec()
if let (Some(parent), Some(left), Some(right)) = (
string_or_nothing(parent),
string_or_nothing(left),
string_or_nothing(right),
) {
crate::reconcile(&parent, &left.into(), &right.into(), &*tokenizer)
.apply()
.text()
.into_bytes()
} else {
crate::reconcile(
str::from_utf8(parent).expect("parent must be valid UTF-8 because it's not binary"),
&str::from_utf8(left)
.expect("left must be valid UTF-8 because it's not binary")
.into(),
&str::from_utf8(right)
.expect("right must be valid UTF-8 because it's not binary")
.into(),
&*tokenizer,
)
.apply()
.text()
.into_bytes()
right.to_vec()
}
}
/// WASM wrapper around getting a compact diff representation as a JSON string
/// WASM wrapper around getting a compact diff representation of two texts as a
/// list of numbers and strings.
#[wasm_bindgen(js_name = diff)]
#[must_use]
pub fn diff(parent: &str, changed: &TextWithCursors, tokenizer: BuiltinTokenizer) -> Vec<JsValue> {
set_panic_hook();
let edited_text = EditedText::from_strings_with_tokenizer(parent, changed, &*tokenizer);
edited_text
.to_diff()
.into_iter()
.map(std::convert::Into::into)
.collect()
}
/// Inverse of `diff`, applies a compact diff representation to a parent text
///
/// # Panics
///
/// If serialization to JSON fails which should not happen
#[wasm_bindgen(js_name = getCompactDiff)]
/// Panics if the diff format is invalid or there's an integer overflow when
/// applying the diff.
#[wasm_bindgen(js_name = undiff)]
#[must_use]
pub fn get_compact_diff(
parent: &str,
changed: &TextWithCursors,
tokenizer: BuiltinTokenizer,
) -> String {
pub fn undiff(parent: &str, diff: Vec<JsValue>, tokenizer: BuiltinTokenizer) -> String {
set_panic_hook();
let edited_text = crate::EditedText::from_strings_with_tokenizer(parent, changed, &*tokenizer);
let change_set = edited_text.to_change_set();
serde_json::to_string(&change_set).expect("Failed to serialize change set")
}
/// Heuristically determine if the given data is a binary or a text file's
/// content.
#[wasm_bindgen(js_name = isBinary)]
#[must_use]
pub fn is_binary(data: &[u8]) -> bool {
set_panic_hook();
crate::is_binary(data)
EditedText::from_diff(
parent,
diff.into_iter()
.map(std::convert::TryInto::try_into)
.collect::<Result<_, _>>()
.expect("Invalid diff format"),
&*tokenizer,
)
.apply()
.text()
}
fn set_panic_hook() {
@ -140,3 +142,30 @@ impl TextWithCursorsAndHistory {
#[must_use]
pub fn history(&self) -> Vec<SpanWithHistory> { self.history.clone() }
}
/// Returns the UTF8 parsed string if it's a text, or `None` if it's likely
/// binary.
#[must_use]
fn string_or_nothing(data: &[u8]) -> Option<String> {
if data.contains(&0) {
// Even though the NUL character is valid in UTF-8, it's highly suspicious in
// human-readable text.
return None;
}
std::str::from_utf8(data)
.map(std::borrow::ToOwned::to_owned)
.ok()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_string_or_nothing() {
assert_eq!(string_or_nothing(&[0, 159, 146, 150]), None);
assert_eq!(string_or_nothing(&[0, 12]), None);
assert_eq!(string_or_nothing(b"hello"), Some("hello".into()));
}
}