Improve network usage for small text changes (#166)
This commit is contained in:
parent
1da17c462e
commit
be1635c26e
20 changed files with 697 additions and 62 deletions
7
sync-server/Cargo.lock
generated
7
sync-server/Cargo.lock
generated
|
|
@ -1680,9 +1680,12 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "reconcile-text"
|
||||
version = "0.5.0"
|
||||
version = "0.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c8d690c19b0bf6574cd3591d10f20df5aa52d2af95b8dcaacbc86893292ac8c5"
|
||||
checksum = "913440a3c2b90cd3ed3e967660f2bb624b71e8059b9fc86960a5f91bd1e2e353"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "redox_syscall"
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ bimap = "0.6.3"
|
|||
ts-rs = { version = "10.1", features = ["uuid-impl", "chrono-impl"] }
|
||||
serde_with = "3.15.1"
|
||||
base64 = "0.22.1"
|
||||
reconcile-text = "0.5.0"
|
||||
reconcile-text = { version = "0.7.1", features = ["serde"] }
|
||||
|
||||
[profile.release]
|
||||
codegen-units = 1
|
||||
|
|
|
|||
|
|
@ -117,8 +117,12 @@ fn get_authed_routes(app_state: AppState) -> Router<AppState> {
|
|||
get(fetch_latest_document_version::fetch_latest_document_version),
|
||||
)
|
||||
.route(
|
||||
"/vaults/:vault_id/documents/:document_id",
|
||||
put(update_document::update_document),
|
||||
"/vaults/:vault_id/documents/:document_id/binary",
|
||||
put(update_document::update_binary),
|
||||
)
|
||||
.route(
|
||||
"/vaults/:vault_id/documents/:document_id/text",
|
||||
put(update_document::update_text),
|
||||
)
|
||||
.route(
|
||||
"/vaults/:vault_id/documents/:document_id/versions/:version_id",
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
use axum::body::Bytes;
|
||||
use axum_typed_multipart::{FieldData, TryFromMultipart};
|
||||
use reconcile_text::NumberOrString;
|
||||
use serde::{self, Deserialize};
|
||||
use ts_rs::TS;
|
||||
|
||||
|
|
@ -20,17 +21,28 @@ pub struct CreateDocumentVersion {
|
|||
pub content: FieldData<Bytes>,
|
||||
}
|
||||
|
||||
#[derive(TS, Debug, TryFromMultipart)]
|
||||
#[ts(export)]
|
||||
pub struct UpdateDocumentVersion {
|
||||
#[derive(Debug, TryFromMultipart)]
|
||||
pub struct UpdateBinaryDocumentVersion {
|
||||
pub parent_version_id: VaultUpdateId,
|
||||
pub relative_path: String,
|
||||
|
||||
#[ts(as = "Vec<u8>")]
|
||||
#[form_data(limit = "unlimited")]
|
||||
pub content: FieldData<Bytes>,
|
||||
}
|
||||
|
||||
#[derive(TS, Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export)]
|
||||
pub struct UpdateTextDocumentVersion {
|
||||
#[ts(as = "i32")]
|
||||
pub parent_version_id: VaultUpdateId,
|
||||
|
||||
pub relative_path: String,
|
||||
|
||||
#[ts(type = "Array<number | string>")]
|
||||
pub content: Vec<NumberOrString>,
|
||||
}
|
||||
|
||||
#[derive(TS, Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export)]
|
||||
|
|
|
|||
|
|
@ -6,23 +6,25 @@ use axum::{
|
|||
use axum_extra::TypedHeader;
|
||||
use axum_typed_multipart::TypedMultipart;
|
||||
use log::info;
|
||||
use reconcile_text::{BuiltinTokenizer, is_binary, reconcile};
|
||||
use reconcile_text::{BuiltinTokenizer, EditedText, reconcile};
|
||||
use serde::Deserialize;
|
||||
|
||||
use super::{
|
||||
device_id_header::DeviceIdHeader, requests::UpdateDocumentVersion,
|
||||
device_id_header::DeviceIdHeader, requests::UpdateTextDocumentVersion,
|
||||
responses::DocumentUpdateResponse,
|
||||
};
|
||||
use crate::{
|
||||
app_state::{
|
||||
AppState,
|
||||
database::models::{DocumentId, StoredDocumentVersion, VaultId},
|
||||
database::models::{DocumentId, StoredDocumentVersion, VaultId, VaultUpdateId},
|
||||
},
|
||||
config::user_config::User,
|
||||
errors::{SyncServerError, not_found_error, server_error},
|
||||
server::requests::UpdateBinaryDocumentVersion,
|
||||
utils::{
|
||||
dedup_paths::dedup_paths, is_file_type_mergable::is_file_type_mergable,
|
||||
normalize::normalize, sanitize_path::sanitize_path,
|
||||
dedup_paths::dedup_paths, is_binary::is_binary,
|
||||
is_file_type_mergable::is_file_type_mergable, normalize::normalize,
|
||||
sanitize_path::sanitize_path,
|
||||
},
|
||||
};
|
||||
|
||||
|
|
@ -30,13 +32,11 @@ use crate::{
|
|||
pub struct UpdateDocumentPathParams {
|
||||
#[serde(deserialize_with = "normalize")]
|
||||
vault_id: VaultId,
|
||||
|
||||
document_id: DocumentId,
|
||||
}
|
||||
|
||||
#[axum::debug_handler]
|
||||
#[allow(clippy::too_many_lines)]
|
||||
pub async fn update_document(
|
||||
pub async fn update_binary(
|
||||
Path(UpdateDocumentPathParams {
|
||||
vault_id,
|
||||
document_id,
|
||||
|
|
@ -44,25 +44,92 @@ pub async fn update_document(
|
|||
Extension(user): Extension<User>,
|
||||
TypedHeader(device_id): TypedHeader<DeviceIdHeader>,
|
||||
State(state): State<AppState>,
|
||||
TypedMultipart(request): TypedMultipart<UpdateDocumentVersion>,
|
||||
TypedMultipart(request): TypedMultipart<UpdateBinaryDocumentVersion>,
|
||||
) -> Result<Json<DocumentUpdateResponse>, SyncServerError> {
|
||||
// No need for a transaction as document versions are immutable
|
||||
let parent_document = state
|
||||
let parent_document = get_parent_document(&state, &vault_id, request.parent_version_id).await?;
|
||||
let content = request.content.contents.to_vec();
|
||||
|
||||
update_document(
|
||||
parent_document,
|
||||
vault_id,
|
||||
document_id,
|
||||
user,
|
||||
device_id,
|
||||
state,
|
||||
&request.relative_path,
|
||||
content,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
#[axum::debug_handler]
|
||||
#[allow(clippy::too_many_lines)]
|
||||
pub async fn update_text(
|
||||
Path(UpdateDocumentPathParams {
|
||||
vault_id,
|
||||
document_id,
|
||||
}): Path<UpdateDocumentPathParams>,
|
||||
Extension(user): Extension<User>,
|
||||
TypedHeader(device_id): TypedHeader<DeviceIdHeader>,
|
||||
State(state): State<AppState>,
|
||||
Json(request): Json<UpdateTextDocumentVersion>,
|
||||
) -> Result<Json<DocumentUpdateResponse>, SyncServerError> {
|
||||
let parent_document = get_parent_document(&state, &vault_id, request.parent_version_id).await?;
|
||||
|
||||
let edited_text = EditedText::from_diff(
|
||||
str::from_utf8(&parent_document.content)
|
||||
.expect("parent must be valid UTF-8 because it's a text document"),
|
||||
request.content,
|
||||
&*BuiltinTokenizer::Word,
|
||||
);
|
||||
|
||||
let content = edited_text.apply().text().into_bytes();
|
||||
|
||||
update_document(
|
||||
parent_document,
|
||||
vault_id,
|
||||
document_id,
|
||||
user,
|
||||
device_id,
|
||||
state,
|
||||
&request.relative_path,
|
||||
content,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn get_parent_document(
|
||||
state: &AppState,
|
||||
vault_id: &VaultId,
|
||||
parent_version_id: VaultUpdateId,
|
||||
) -> Result<StoredDocumentVersion, SyncServerError> {
|
||||
state
|
||||
.database
|
||||
.get_document_version(&vault_id, request.parent_version_id, None)
|
||||
.get_document_version(vault_id, parent_version_id, None)
|
||||
.await
|
||||
.map_err(server_error)?
|
||||
.map_or_else(
|
||||
|| {
|
||||
Err(not_found_error(anyhow!(
|
||||
"Parent version with id `{}` not found",
|
||||
request.parent_version_id
|
||||
"Parent version with id `{parent_version_id}` not found"
|
||||
)))
|
||||
},
|
||||
Ok,
|
||||
)?;
|
||||
)
|
||||
}
|
||||
|
||||
let sanitized_relative_path = sanitize_path(&request.relative_path);
|
||||
#[allow(clippy::too_many_lines, clippy::too_many_arguments)]
|
||||
async fn update_document(
|
||||
parent_document: StoredDocumentVersion,
|
||||
vault_id: VaultId,
|
||||
document_id: DocumentId,
|
||||
user: User,
|
||||
device_id: DeviceIdHeader,
|
||||
state: AppState,
|
||||
relative_path: &str,
|
||||
content: Vec<u8>,
|
||||
) -> Result<Json<DocumentUpdateResponse>, SyncServerError> {
|
||||
let sanitized_relative_path = sanitize_path(relative_path);
|
||||
|
||||
let mut transaction = state
|
||||
.database
|
||||
|
|
@ -102,8 +169,6 @@ pub async fn update_document(
|
|||
)));
|
||||
}
|
||||
|
||||
let content = request.content.contents.to_vec();
|
||||
|
||||
// Return the latest version if the content and path are the same as the latest
|
||||
// version
|
||||
if content == latest_version.content && sanitized_relative_path == latest_version.relative_path
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
pub mod dedup_paths;
|
||||
pub mod is_binary;
|
||||
pub mod is_file_type_mergable;
|
||||
pub mod normalize;
|
||||
pub mod rotating_file_writer;
|
||||
|
|
|
|||
26
sync-server/src/utils/is_binary.rs
Normal file
26
sync-server/src/utils/is_binary.rs
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
/// Heuristically determine if the given data is a binary or a text file's
|
||||
/// content.
|
||||
///
|
||||
/// Only text inputs can be reconciled using the crate's functions.
|
||||
#[must_use]
|
||||
pub fn is_binary(data: &[u8]) -> bool {
|
||||
if data.contains(&0) {
|
||||
// Even though the NUL character is valid in UTF-8, it's highly suspicious in
|
||||
// human-readable text.
|
||||
return true;
|
||||
}
|
||||
|
||||
std::str::from_utf8(data).is_err()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_is_binary() {
|
||||
assert!(is_binary(&[0, 159, 146, 150]));
|
||||
assert!(is_binary(&[0, 12]));
|
||||
assert!(!is_binary(b"hello"));
|
||||
}
|
||||
}
|
||||
|
|
@ -93,6 +93,26 @@ impl RotatingFileWriter {
|
|||
SystemTime::now() >= inner.next_rotation_time
|
||||
}
|
||||
|
||||
fn open_or_create_log_file(inner: &mut RotatingFileWriterInner) -> io::Result<()> {
|
||||
// If we haven't reached rotation time and there's an existing log file, reuse it
|
||||
if !Self::should_rotate(inner)
|
||||
&& let Some(latest_file) =
|
||||
Self::find_latest_log_file(&inner.directory, &inner.file_prefix)
|
||||
{
|
||||
let filepath = inner.directory.join(&latest_file);
|
||||
let file = OpenOptions::new()
|
||||
.create(true)
|
||||
.append(true)
|
||||
.open(&filepath)?;
|
||||
|
||||
inner.current_file = Some(file);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Otherwise, create a new log file with current timestamp
|
||||
Self::rotate(inner)
|
||||
}
|
||||
|
||||
fn rotate(inner: &mut RotatingFileWriterInner) -> io::Result<()> {
|
||||
let timestamp = Local::now().format("%Y-%m-%d_%H-%M-%S");
|
||||
let filename = format!("{}.{}.log", inner.file_prefix, timestamp);
|
||||
|
|
@ -114,7 +134,9 @@ impl Write for RotatingFileWriter {
|
|||
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
|
||||
let mut inner = self.inner.lock().unwrap();
|
||||
|
||||
if inner.current_file.is_none() || Self::should_rotate(&inner) {
|
||||
if inner.current_file.is_none() {
|
||||
Self::open_or_create_log_file(&mut inner)?;
|
||||
} else if Self::should_rotate(&inner) {
|
||||
Self::rotate(&mut inner)?;
|
||||
}
|
||||
|
||||
|
|
@ -328,6 +350,7 @@ mod tests {
|
|||
#[test]
|
||||
fn test_restart_behavior() {
|
||||
let temp_dir = std::env::temp_dir().join("test_restart_behavior");
|
||||
let _ = fs::remove_dir_all(&temp_dir);
|
||||
|
||||
// Create initial writer and write some data
|
||||
{
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue