From a9227fa5bbf5028176d6b9a0acd6b37a9c5c5f47 Mon Sep 17 00:00:00 2001 From: Andras Schmelczer Date: Sat, 11 Jan 2025 10:53:05 +0000 Subject: [PATCH] Merge based on file type --- backend/sync_lib/src/lib.rs | 30 +++++++++++++++---- backend/sync_lib/tests/web.rs | 20 +++++++++++-- .../sync_server/src/server/create_document.rs | 16 ++++++---- .../sync_server/src/server/update_document.rs | 9 ++++-- 4 files changed, 60 insertions(+), 15 deletions(-) diff --git a/backend/sync_lib/src/lib.rs b/backend/sync_lib/src/lib.rs index 164939ac..b8424e08 100644 --- a/backend/sync_lib/src/lib.rs +++ b/backend/sync_lib/src/lib.rs @@ -19,11 +19,17 @@ pub mod errors; /// Encode binary data for easy transport over HTTP. Inverse of /// `base64_to_bytes`. #[wasm_bindgen(js_name = bytesToBase64)] -pub fn bytes_to_base64(input: &[u8]) -> String { STANDARD.encode(input) } +pub fn bytes_to_base64(input: &[u8]) -> String { + set_panic_hook(); + + STANDARD.encode(input) +} /// Inverse of `bytes_to_base64`. #[wasm_bindgen(js_name = base64ToBytes)] pub fn base64_to_bytes(input: &str) -> Result, SyncLibError> { + set_panic_hook(); + STANDARD.decode(input).map_err(SyncLibError::from) } @@ -32,6 +38,8 @@ pub fn base64_to_bytes(input: &str) -> Result, SyncLibError> { /// documents is binary. #[wasm_bindgen] pub fn merge(parent: &[u8], left: &[u8], right: &[u8]) -> Vec { + set_panic_hook(); + if is_binary(parent) || is_binary(left) || is_binary(right) { right.to_vec() } else { @@ -47,6 +55,8 @@ pub fn merge(parent: &[u8], left: &[u8], right: &[u8]) -> Vec { /// WASM wrapper around `reconcile::reconcile` for text merging. #[wasm_bindgen(js_name = mergeText)] pub fn merge_text(parent: &str, left: &str, right: &str) -> String { + set_panic_hook(); + reconcile::reconcile(parent, left, right) } @@ -54,6 +64,8 @@ pub fn merge_text(parent: &str, left: &str, right: &str) -> String { /// content. #[wasm_bindgen(js_name = isBinary)] pub fn is_binary(data: &[u8]) -> bool { + set_panic_hook(); + if data.iter().any(|&b| b == 0) { // Even though the NUL character is valid in UTF-8, it's highly suspicious in // human-readable text. @@ -63,10 +75,18 @@ pub fn is_binary(data: &[u8]) -> bool { std::str::from_utf8(data).is_err() } -/// Set up panic hook for better error messages in the browser console. -#[cfg(feature = "console_error_panic_hook")] -#[wasm_bindgen(js_name = setPanicHook)] -pub fn set_panic_hook() { +/// We don't want to supporte merging structured data like JSON, YAML, etc. +#[wasm_bindgen(js_name = isFileTypeMergable)] +pub fn is_file_type_mergable(path_or_file_name: &str) -> bool { + set_panic_hook(); + + let file_extension = path_or_file_name.split('.').last().unwrap_or_default(); + + matches!(file_extension.to_lowercase().as_str(), "md" | "txt") +} + +fn set_panic_hook() { // https://github.com/rustwasm/console_error_panic_hook#readme + #[cfg(feature = "console_error_panic_hook")] console_error_panic_hook::set_once(); } diff --git a/backend/sync_lib/tests/web.rs b/backend/sync_lib/tests/web.rs index ceae695b..ffea18d9 100644 --- a/backend/sync_lib/tests/web.rs +++ b/backend/sync_lib/tests/web.rs @@ -1,5 +1,3 @@ -//! Test suite for the Web and headless browsers. - use insta::assert_debug_snapshot; use sync_lib::*; use wasm_bindgen_test::*; @@ -44,3 +42,21 @@ fn test_is_binary() { assert!(is_binary(&[0, 12])); assert!(!is_binary(b"hello")); } + +#[wasm_bindgen_test(unsupported = test)] +fn test_is_binary_empty() { + assert!(!is_binary(b"")); +} + +#[wasm_bindgen_test(unsupported = test)] +fn test_is_file_type_mergable() { + assert!(is_file_type_mergable(".md")); + assert!(is_file_type_mergable("hi.md")); + assert!(is_file_type_mergable("my/path/to/my/document.md")); + assert!(is_file_type_mergable("hi.MD")); + assert!(is_file_type_mergable("my/path/to/my/DOCUMENT.MD")); + + assert!(!is_file_type_mergable(".json")); + assert!(!is_file_type_mergable("HELLO.JSON")); + assert!(!is_file_type_mergable("my/config.yml")); +} diff --git a/backend/sync_server/src/server/create_document.rs b/backend/sync_server/src/server/create_document.rs index 9dda4bcd..cfaeaa0e 100644 --- a/backend/sync_server/src/server/create_document.rs +++ b/backend/sync_server/src/server/create_document.rs @@ -10,7 +10,7 @@ use chrono::{DateTime, Utc}; use log::info; use schemars::JsonSchema; use serde::Deserialize; -use sync_lib::{base64_to_bytes, merge}; +use sync_lib::{base64_to_bytes, is_file_type_mergable, merge}; use super::{ app_state::AppState, @@ -127,11 +127,15 @@ async fn internal_create_document( ))); } - let merged_content = merge( - &[], // the empty string is the first common parent of the two documents, - &existing_version.content, - &content, - ); + let merged_content = if is_file_type_mergable(&sanitized_relative_path) { + merge( + &[], // the empty string is the first common parent of the two documents, + &existing_version.content, + &content, + ) + } else { + content + }; let new_version = StoredDocumentVersion { vault_id, diff --git a/backend/sync_server/src/server/update_document.rs b/backend/sync_server/src/server/update_document.rs index 073744d4..7b1d319e 100644 --- a/backend/sync_server/src/server/update_document.rs +++ b/backend/sync_server/src/server/update_document.rs @@ -10,7 +10,7 @@ use chrono::{DateTime, Utc}; use log::info; use schemars::JsonSchema; use serde::Deserialize; -use sync_lib::{base64_to_bytes, merge}; +use sync_lib::{base64_to_bytes, is_file_type_mergable, merge}; use super::{ app_state::AppState, @@ -155,7 +155,12 @@ async fn internal_update_document( ))); } - let merged_content = merge(&parent_document.content, &latest_version.content, &content); + let merged_content = if is_file_type_mergable(&sanitized_relative_path) { + merge(&parent_document.content, &latest_version.content, &content) + } else { + content.clone() + }; + let is_different_from_request_content = merged_content != content; // We can only update the relative path if we're the first one to do so