This commit is contained in:
Andras Schmelczer 2026-05-04 13:07:18 +01:00
parent 39c5591d36
commit 35877b69da
94 changed files with 3157 additions and 1859 deletions

View file

@ -433,12 +433,19 @@ impl Database {
WriteTransaction::new(&pools.writer, write_guard).await
}
/// Return the latest state of all documents in the vault
/// Return the latest state of all documents in the vault, optionally
/// bounded above by `up_to_vault_update_id` so that the result is a
/// stable snapshot at exactly that cursor (commits past the cursor
/// will be delivered separately via the broadcast channel).
pub async fn get_latest_documents(
&self,
vault: &VaultId,
up_to_vault_update_id: Option<VaultUpdateId>,
connection: Option<&mut SqliteConnection>,
) -> Result<Vec<DocumentVersionWithoutContent>> {
// `i64::MAX` makes the upper bound a no-op for callers that don't
// care about an exact snapshot (they pass `None`).
let upper = up_to_vault_update_id.unwrap_or(i64::MAX);
let query = sqlx::query!(
r#"
select
@ -452,8 +459,10 @@ impl Database {
device_id,
length(content) as "content_size: u64"
from latest_document_versions
where vault_update_id <= ?
order by vault_update_id
"#,
upper,
);
if let Some(conn) = connection {
@ -482,13 +491,20 @@ impl Database {
}
/// Return the latest state of all documents (including deleted) in the
/// vault which have changed since the given update id
/// vault which have changed since the given update id, bounded above
/// by `up_to_vault_update_id` so the catch-up result is a stable
/// snapshot at exactly that cursor. Commits past the cursor will be
/// delivered separately via the broadcast channel.
pub async fn get_latest_documents_since(
&self,
vault: &VaultId,
vault_update_id: VaultUpdateId,
up_to_vault_update_id: Option<VaultUpdateId>,
connection: Option<&mut SqliteConnection>,
) -> Result<Vec<DocumentVersionWithoutContent>> {
// `i64::MAX` makes the upper bound a no-op for callers that don't
// care about an exact snapshot (they pass `None`).
let upper = up_to_vault_update_id.unwrap_or(i64::MAX);
let query = sqlx::query!(
r#"
select
@ -502,10 +518,11 @@ impl Database {
device_id,
length(content) as "content_size: u64"
from latest_document_versions
where vault_update_id > ?
where vault_update_id > ? and vault_update_id <= ?
order by vault_update_id
"#,
vault_update_id
vault_update_id,
upper,
);
if let Some(conn) = connection {

View file

@ -3,10 +3,10 @@ use std::{
sync::{Arc, Mutex as StdMutex},
};
use log::{debug, warn};
use log::{debug, info, warn};
use tokio::sync::{Mutex, broadcast};
use super::models::WebSocketServerMessageWithOrigin;
use super::models::{WebSocketServerMessage, WebSocketServerMessageWithOrigin};
use crate::{app_state::database::models::VaultId, config::server_config::ServerConfig};
#[derive(Debug, Clone)]
@ -46,8 +46,16 @@ impl Broadcasts {
}
/// Remove senders for vaults with no active receivers
fn prune_inactive_vaults(tx_map: &mut TxMap) {
tx_map.retain(|_, sender| sender.receiver_count() > 0);
fn prune_inactive_vaults(tx_map: &mut TxMap) -> Vec<VaultId> {
let mut pruned = Vec::new();
tx_map.retain(|vault, sender| {
let alive = sender.receiver_count() > 0;
if !alive {
pruned.push(vault.clone());
}
alive
});
pruned
}
pub fn get_receiver(
@ -60,10 +68,15 @@ impl Broadcasts {
.tx
.lock()
.expect("broadcasts.tx mutex poisoned — a previous holder panicked");
Self::prune_inactive_vaults(&mut tx_map);
let count_before_prune = tx_map
.get(&vault)
.map_or(0, tokio::sync::broadcast::Sender::receiver_count);
let pruned = Self::prune_inactive_vaults(&mut tx_map);
let pruned_self = pruned.contains(&vault);
let sender = tx_map
.entry(vault)
.entry(vault.clone())
.or_insert_with(|| broadcast::channel(self.broadcast_channel_capacity).0);
// Hold the lock across the count check *and* the subscribe so the
@ -75,7 +88,13 @@ impl Broadcasts {
)));
}
Ok(sender.subscribe())
let receiver = sender.subscribe();
let count_after = sender.receiver_count();
info!(
"[BCAST] get_receiver vault={vault} count_before_prune={count_before_prune} pruned_self={pruned_self} pruned_total={} count_after_subscribe={count_after}",
pruned.len()
);
Ok(receiver)
}
/// Notify all clients (who are subscribed to the vault) about an update.
@ -83,23 +102,46 @@ impl Broadcasts {
/// function return without worrying about task cancellation dropping
/// the broadcast mid-flight. Failures are logged, never propagated.
pub fn send_document_update(&self, vault: VaultId, document: WebSocketServerMessageWithOrigin) {
let vault_update_id = match &document.message {
WebSocketServerMessage::VaultUpdate(u) => Some(u.document.vault_update_id),
WebSocketServerMessage::CursorPositions(_) => None,
};
let is_deleted = match &document.message {
WebSocketServerMessage::VaultUpdate(u) => Some(u.document.is_deleted),
WebSocketServerMessage::CursorPositions(_) => None,
};
let mut tx_map = self
.tx
.lock()
.expect("broadcasts.tx mutex poisoned — a previous holder panicked");
Self::prune_inactive_vaults(&mut tx_map);
let count_before_prune = tx_map
.get(&vault)
.map_or(0, tokio::sync::broadcast::Sender::receiver_count);
let pruned = Self::prune_inactive_vaults(&mut tx_map);
let pruned_self = pruned.contains(&vault);
let sender = tx_map
.entry(vault.clone())
.or_insert_with(|| broadcast::channel(self.broadcast_channel_capacity).0);
if sender.receiver_count() == 0 {
let count_before_send = sender.receiver_count();
if count_before_send == 0 {
info!(
"[BCAST] send_document_update vault={vault} vuid={vault_update_id:?} is_deleted={is_deleted:?} count_before_prune={count_before_prune} pruned_self={pruned_self} count_before_send=0 SKIPPED"
);
debug!("Skipping broadcast, no clients connected for vault `{vault}`");
return;
}
if let Err(e) = sender.send(document) {
warn!("Failed to broadcast to vault `{vault}`: {e}");
let send_result = sender.send(document);
match &send_result {
Ok(n) => info!(
"[BCAST] send_document_update vault={vault} vuid={vault_update_id:?} is_deleted={is_deleted:?} count_before_prune={count_before_prune} pruned_self={pruned_self} count_before_send={count_before_send} SENT delivered_to={n}"
),
Err(e) => warn!(
"[BCAST] send_document_update vault={vault} vuid={vault_update_id:?} is_deleted={is_deleted:?} count_before_prune={count_before_prune} pruned_self={pruned_self} count_before_send={count_before_send} FAILED err={e}"
),
}
}
}

View file

@ -44,21 +44,29 @@ pub fn get_authenticated_handshake(
}
}
/// Stream the documents the client missed while offline, bounded above
/// by `up_to_vault_update_id` so the catch-up is a stable snapshot at
/// exactly that cursor. The WebSocket handshake atomically subscribes
/// to the broadcast channel and snapshots this cursor under the per-
/// vault send lock; commits past the cursor are then delivered solely
/// through the broadcast channel (filtered by the same cursor on the
/// receive side), so every committed update is delivered exactly once.
pub async fn get_unseen_documents(
state: &AppState,
vault_id: &VaultId,
last_seen_vault_update_id: Option<VaultUpdateId>,
up_to_vault_update_id: VaultUpdateId,
) -> Result<Vec<DocumentVersionWithoutContent>, SyncServerError> {
if let Some(update_id) = last_seen_vault_update_id {
state
.database
.get_latest_documents_since(vault_id, update_id, None)
.get_latest_documents_since(vault_id, update_id, Some(up_to_vault_update_id), None)
.await
.map_err(server_error)
} else {
state
.database
.get_latest_documents(vault_id, None)
.get_latest_documents(vault_id, Some(up_to_vault_update_id), None)
.await
.map_err(server_error)
}

View file

@ -11,7 +11,9 @@ use super::device_id_header::DeviceIdHeader;
use crate::{
app_state::{
AppState,
database::models::{DocumentId, DocumentVersionWithoutContent, StoredDocumentVersion, VaultId},
database::models::{
DocumentId, DocumentVersionWithoutContent, StoredDocumentVersion, VaultId,
},
},
config::user_config::User,
errors::{SyncServerError, not_found_error, server_error, write_transaction_error},

View file

@ -37,13 +37,13 @@ pub async fn fetch_latest_documents(
let documents = if let Some(since_update_id) = since_update_id {
state
.database
.get_latest_documents_since(&vault_id, since_update_id, None)
.get_latest_documents_since(&vault_id, since_update_id, None, None)
.await
.map_err(server_error)
} else {
state
.database
.get_latest_documents(&vault_id, None)
.get_latest_documents(&vault_id, None, None)
.await
.map_err(server_error)
}?;

View file

@ -284,17 +284,14 @@ pub async fn update_document(
// reconcile above, independent of which rename wins. A missing
// relative_path means "keep current path" (content-only edit).
let new_relative_path = match sanitized_relative_path.as_deref() {
Some(requested) if parent_relative_path == latest_version.relative_path
&& requested != latest_version.relative_path =>
Some(requested)
if parent_relative_path == latest_version.relative_path
&& requested != latest_version.relative_path =>
{
let new_path = find_first_available_path(
&vault_id,
requested,
&state.database,
&mut transaction,
)
.await
.map_err(server_error)?;
let new_path =
find_first_available_path(&vault_id, requested, &state.database, &mut transaction)
.await
.map_err(server_error)?;
if new_path != requested {
info!(

View file

@ -110,10 +110,37 @@ async fn websocket(
drop(pending_guard);
let max_clients = state.config.server.max_clients_per_vault;
// Atomic subscribe + cursor snapshot, serialized against in-flight
// broadcasts:
//
// 1. Acquire the per-vault broadcast send lock. While we hold it,
// no `send_document_update` can run, so no broadcast can fire
// between our subscribe and our cursor snapshot.
// 2. Subscribe to the broadcast channel (now we'll see every
// broadcast that fires after we drop the send guard).
// 3. Snapshot `cursor = max committed vault_update_id`. Because
// `insert_document_version` holds the same send lock from
// *before* the commit through *after* the broadcast, every doc
// visible at this cursor has either (a) already had its
// broadcast delivered to all then-existing subscribers — and we
// weren't one of them, so we'll catch it via the snapshot — or
// (b) had its broadcast contend on the lock we're holding, and
// will be delivered to us as soon as we drop the guard, with
// `vault_update_id > cursor`.
// 4. Drop the send guard so writers can resume broadcasting.
// 5. Stream the catch-up bounded by the cursor — i.e. only docs
// with `vault_update_id <= cursor` — exactly once.
// 6. The send task forwards broadcasts but filters to
// `vault_update_id > cursor`, so a doc that's both in the
// catch-up and in a contended-then-released broadcast is
// delivered exactly once (via the catch-up).
let send_guard = state.broadcasts.acquire_send_lock(&vault_id).await;
let mut broadcast_receiver = match state.broadcasts.get_receiver(vault_id.clone(), max_clients)
{
Ok(receiver) => receiver,
Err(err) => {
drop(send_guard);
warn!(
"Vault `{vault_id}` has reached the maximum number of clients ({max_clients}), rejecting connection from `{}`",
authed_handshake.handshake.device_id
@ -133,15 +160,34 @@ async fn websocket(
return Err(err);
}
};
let cursor = state
.database
.get_max_update_id_in_vault(&vault_id, None)
.await
.map_err(server_error)?;
drop(send_guard);
// Catch-up on versions committed while this client was offline,
// streamed one-at-a-time in ascending `vault_update_id` order
// streamed one-at-a-time in ascending `vault_update_id` order, up
// to the snapshot cursor.
let unseen_documents = get_unseen_documents(
&state,
&vault_id,
authed_handshake.handshake.last_seen_vault_update_id,
cursor,
)
.await?;
let unseen_summary: Vec<(i64, bool, String)> = unseen_documents
.iter()
.map(|d| (d.vault_update_id, d.is_deleted, d.relative_path.clone()))
.collect();
info!(
"[CATCHUP] vault={vault_id} device={} last_seen={:?} cursor={cursor} unseen_count={} unseen={:?}",
authed_handshake.handshake.device_id,
authed_handshake.handshake.last_seen_vault_update_id,
unseen_summary.len(),
unseen_summary
);
for document in unseen_documents {
send_update_over_websocket(
&WebSocketServerMessage::VaultUpdate(WebSocketVaultUpdate { document }),
@ -172,6 +218,23 @@ async fn websocket(
continue;
}
// Filter out vault updates already covered by the
// catch-up snapshot. The handshake atomically
// subscribed and snapshotted `cursor` under the
// broadcast send lock, so any broadcast with
// `vault_update_id <= cursor` is one that contended
// on the lock during our subscribe — its row is
// already in the catch-up stream and re-delivering
// it via this channel would duplicate the message.
// Cursor messages aren't versioned and are always
// forwarded.
if let WebSocketServerMessage::VaultUpdate(WebSocketVaultUpdate { document }) =
&update.message
&& document.vault_update_id <= cursor
{
continue;
}
let message = match update.message {
WebSocketServerMessage::CursorPositions(CursorPositionFromServer {
clients,