372 lines
14 KiB
Rust
372 lines
14 KiB
Rust
use crate::{
|
|
app_state::{
|
|
AppState,
|
|
database::models::VaultId,
|
|
websocket::{
|
|
models::{
|
|
CursorPositionFromServer, WebSocketClientMessage, WebSocketServerMessage,
|
|
WebSocketVaultUpdate,
|
|
},
|
|
utils::{
|
|
get_authenticated_handshake, get_unseen_documents, send_update_over_websocket,
|
|
},
|
|
},
|
|
},
|
|
consts::{
|
|
HANDSHAKE_TIMEOUT, MAX_CURSOR_DOCUMENTS, MAX_CURSORS_PER_DOCUMENT, MAX_RELATIVE_PATH_LEN,
|
|
},
|
|
errors::{SyncServerError, client_error, server_error},
|
|
utils::normalize::normalize,
|
|
};
|
|
use anyhow::Context;
|
|
use axum::{
|
|
extract::{
|
|
Path, State,
|
|
ws::{Message, WebSocket, WebSocketUpgrade},
|
|
},
|
|
response::Response,
|
|
};
|
|
use futures::sink::SinkExt;
|
|
use futures::stream::StreamExt;
|
|
use log::{debug, info, warn};
|
|
use serde::Deserialize;
|
|
|
|
/// Tracks a pending (not yet authenticated) WebSocket connection.
|
|
/// Decrements the counter when dropped, ensuring cleanup even if
|
|
/// the upgrade never completes or auth fails.
|
|
struct PendingWsGuard(std::sync::Arc<std::sync::atomic::AtomicUsize>);
|
|
|
|
impl Drop for PendingWsGuard {
|
|
fn drop(&mut self) {
|
|
self.0.fetch_sub(1, std::sync::atomic::Ordering::Relaxed);
|
|
}
|
|
}
|
|
|
|
#[derive(Deserialize)]
|
|
pub struct WebSocketPathParams {
|
|
#[serde(deserialize_with = "normalize")]
|
|
vault_id: VaultId,
|
|
}
|
|
|
|
pub async fn websocket_handler(
|
|
ws: WebSocketUpgrade,
|
|
Path(WebSocketPathParams { vault_id }): Path<WebSocketPathParams>,
|
|
State(state): State<AppState>,
|
|
) -> Result<Response, SyncServerError> {
|
|
let current = state
|
|
.pending_ws_connections
|
|
.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
|
if current >= state.config.server.max_pending_websocket_connections {
|
|
state
|
|
.pending_ws_connections
|
|
.fetch_sub(1, std::sync::atomic::Ordering::Relaxed);
|
|
return Err(client_error(anyhow::anyhow!(
|
|
"Too many pending WebSocket connections"
|
|
)));
|
|
}
|
|
|
|
let guard = PendingWsGuard(state.pending_ws_connections.clone());
|
|
Ok(ws.on_upgrade(move |socket| websocket_wrapped(state, socket, vault_id, guard)))
|
|
}
|
|
|
|
async fn websocket_wrapped(
|
|
state: AppState,
|
|
stream: WebSocket,
|
|
vault_id: VaultId,
|
|
pending_guard: PendingWsGuard,
|
|
) {
|
|
info!("WebSocket connection opened on vault `{vault_id}`");
|
|
|
|
let result = websocket(state, stream, vault_id.clone(), pending_guard).await;
|
|
|
|
if let Err(err) = result {
|
|
debug!("WebSocket connection error on vault `{vault_id}`: {err}");
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::too_many_lines)]
|
|
async fn websocket(
|
|
state: AppState,
|
|
stream: WebSocket,
|
|
vault_id: VaultId,
|
|
pending_guard: PendingWsGuard,
|
|
) -> Result<(), SyncServerError> {
|
|
let (mut sender, mut websocket_receiver) = stream.split();
|
|
|
|
let handshake_msg = tokio::time::timeout(HANDSHAKE_TIMEOUT, websocket_receiver.next())
|
|
.await
|
|
.map_err(|_| client_error(anyhow::anyhow!("WebSocket handshake timed out")))?
|
|
.transpose()
|
|
.map_err(|e| client_error(anyhow::anyhow!("WebSocket error during handshake: {e}")))?;
|
|
|
|
let authed_handshake = get_authenticated_handshake(&state, &vault_id, handshake_msg)?;
|
|
|
|
info!(
|
|
"WebSocket handshake successful for vault `{vault_id}` for `{}`",
|
|
authed_handshake.handshake.device_id
|
|
);
|
|
|
|
// Auth complete — no longer a pending connection.
|
|
drop(pending_guard);
|
|
|
|
let max_clients = state.config.server.max_clients_per_vault;
|
|
|
|
// Atomic subscribe + cursor snapshot, serialized against in-flight
|
|
// broadcasts:
|
|
//
|
|
// 1. Acquire the per-vault broadcast send lock. While we hold it,
|
|
// no `send_document_update` can run, so no broadcast can fire
|
|
// between our subscribe and our cursor snapshot.
|
|
// 2. Subscribe to the broadcast channel (now we'll see every
|
|
// broadcast that fires after we drop the send guard).
|
|
// 3. Snapshot `cursor = max committed vault_update_id`. Because
|
|
// `insert_document_version` holds the same send lock from
|
|
// *before* the commit through *after* the broadcast, every doc
|
|
// visible at this cursor has either (a) already had its
|
|
// broadcast delivered to all then-existing subscribers — and we
|
|
// weren't one of them, so we'll catch it via the snapshot — or
|
|
// (b) had its broadcast contend on the lock we're holding, and
|
|
// will be delivered to us as soon as we drop the guard, with
|
|
// `vault_update_id > cursor`.
|
|
// 4. Drop the send guard so writers can resume broadcasting.
|
|
// 5. Stream the catch-up bounded by the cursor — i.e. only docs
|
|
// with `vault_update_id <= cursor` — exactly once.
|
|
// 6. The send task forwards broadcasts but filters to
|
|
// `vault_update_id > cursor`, so a doc that's both in the
|
|
// catch-up and in a contended-then-released broadcast is
|
|
// delivered exactly once (via the catch-up).
|
|
let send_guard = state.broadcasts.acquire_send_lock(&vault_id).await;
|
|
let mut broadcast_receiver = match state.broadcasts.get_receiver(&vault_id, max_clients) {
|
|
Ok(receiver) => receiver,
|
|
Err(err) => {
|
|
drop(send_guard);
|
|
warn!(
|
|
"Vault `{vault_id}` has reached the maximum number of clients ({max_clients}), rejecting connection from `{}`",
|
|
authed_handshake.handshake.device_id
|
|
);
|
|
if let Err(e) = sender
|
|
.send(Message::Close(Some(axum::extract::ws::CloseFrame {
|
|
code: 4000,
|
|
reason: format!(
|
|
"Vault has reached the maximum number of clients ({max_clients})"
|
|
)
|
|
.into(),
|
|
})))
|
|
.await
|
|
{
|
|
warn!("Failed to send WebSocket close frame: {e}");
|
|
}
|
|
return Err(err);
|
|
}
|
|
};
|
|
let cursor = state
|
|
.database
|
|
.get_max_update_id_in_vault(&vault_id, None)
|
|
.await
|
|
.map_err(server_error)?;
|
|
drop(send_guard);
|
|
|
|
// Catch-up on versions committed while this client was offline,
|
|
// streamed one-at-a-time in ascending `vault_update_id` order, up
|
|
// to the snapshot cursor.
|
|
let unseen_documents = get_unseen_documents(
|
|
&state,
|
|
&vault_id,
|
|
authed_handshake.handshake.last_seen_vault_update_id,
|
|
cursor,
|
|
)
|
|
.await?;
|
|
let unseen_summary: Vec<(i64, bool, String)> = unseen_documents
|
|
.iter()
|
|
.map(|d| (d.vault_update_id, d.is_deleted, d.relative_path.clone()))
|
|
.collect();
|
|
info!(
|
|
"[CATCHUP] vault={vault_id} device={} last_seen={:?} cursor={cursor} unseen_count={} unseen={:?}",
|
|
authed_handshake.handshake.device_id,
|
|
authed_handshake.handshake.last_seen_vault_update_id,
|
|
unseen_summary.len(),
|
|
unseen_summary
|
|
);
|
|
for document in unseen_documents {
|
|
send_update_over_websocket(
|
|
&WebSocketServerMessage::VaultUpdate(WebSocketVaultUpdate { document }),
|
|
&mut sender,
|
|
)
|
|
.await?;
|
|
}
|
|
|
|
send_update_over_websocket(
|
|
&WebSocketServerMessage::CursorPositions(CursorPositionFromServer {
|
|
clients: state.cursors.get_cursors(&vault_id).await,
|
|
}),
|
|
&mut sender,
|
|
)
|
|
.await?;
|
|
|
|
let device_id = authed_handshake.handshake.device_id.clone();
|
|
let mut send_task = tokio::spawn(async move {
|
|
loop {
|
|
match broadcast_receiver.recv().await {
|
|
Ok(update) => {
|
|
// Drop messages this device authored because the HTTP
|
|
// response already carried authoritative state back.
|
|
// Delete broadcasts are sent without an origin so the
|
|
// author also receives them — that's the receipt the
|
|
// client needs to drop the doc from its sync queue.
|
|
if Some(&device_id) == update.origin_device_id.as_ref() {
|
|
continue;
|
|
}
|
|
|
|
// Filter out vault updates already covered by the
|
|
// catch-up snapshot. The handshake atomically
|
|
// subscribed and snapshotted `cursor` under the
|
|
// broadcast send lock, so any broadcast with
|
|
// `vault_update_id <= cursor` is one that contended
|
|
// on the lock during our subscribe — its row is
|
|
// already in the catch-up stream and re-delivering
|
|
// it via this channel would duplicate the message.
|
|
// Cursor messages aren't versioned and are always
|
|
// forwarded.
|
|
if let WebSocketServerMessage::VaultUpdate(WebSocketVaultUpdate { document }) =
|
|
&update.message
|
|
&& document.vault_update_id <= cursor
|
|
{
|
|
continue;
|
|
}
|
|
|
|
let message = match update.message {
|
|
WebSocketServerMessage::CursorPositions(CursorPositionFromServer {
|
|
clients,
|
|
}) => WebSocketServerMessage::CursorPositions(CursorPositionFromServer {
|
|
clients: clients
|
|
.into_iter()
|
|
.filter(|client| client.device_id != device_id)
|
|
.collect(),
|
|
}),
|
|
WebSocketServerMessage::VaultUpdate(_) => update.message,
|
|
};
|
|
|
|
send_update_over_websocket(&message, &mut sender).await?;
|
|
}
|
|
Err(tokio::sync::broadcast::error::RecvError::Lagged(n)) => {
|
|
warn!(
|
|
"WebSocket receiver lagged, dropped {n} messages — disconnecting client to force full resync"
|
|
);
|
|
break;
|
|
}
|
|
Err(tokio::sync::broadcast::error::RecvError::Closed) => break,
|
|
}
|
|
}
|
|
|
|
Ok::<(), SyncServerError>(())
|
|
});
|
|
|
|
let device_id = authed_handshake.handshake.device_id.clone();
|
|
let vault_id_clone = vault_id.clone();
|
|
let cursor_manager = state.cursors.clone();
|
|
let mut receive_task = tokio::spawn(async move {
|
|
while let Some(msg) = websocket_receiver.next().await {
|
|
match msg {
|
|
Ok(Message::Text(message)) => {
|
|
let message: WebSocketClientMessage = serde_json::from_str(&message)
|
|
.context("Failed to parse WebSocket message from client")
|
|
.map_err(client_error)?;
|
|
|
|
match message {
|
|
WebSocketClientMessage::Handshake(_) => {
|
|
return Err(client_error(anyhow::anyhow!(
|
|
"Unexpected handshake message"
|
|
)));
|
|
}
|
|
WebSocketClientMessage::CursorPositions(cursors) => {
|
|
let docs = cursors.documents_with_cursors;
|
|
if docs.len() > MAX_CURSOR_DOCUMENTS {
|
|
warn!(
|
|
"Cursor update rejected: {} documents exceeds limit of {MAX_CURSOR_DOCUMENTS}",
|
|
docs.len()
|
|
);
|
|
continue;
|
|
}
|
|
|
|
let valid = docs.iter().all(|doc| {
|
|
doc.cursors.len() <= MAX_CURSORS_PER_DOCUMENT
|
|
&& doc.relative_path.len() <= MAX_RELATIVE_PATH_LEN
|
|
});
|
|
if !valid {
|
|
warn!(
|
|
"Cursor update rejected: a document exceeds cursor or path length limits"
|
|
);
|
|
continue;
|
|
}
|
|
|
|
cursor_manager
|
|
.update_cursors(
|
|
vault_id_clone.clone(),
|
|
authed_handshake.user.name.clone(),
|
|
&device_id,
|
|
docs,
|
|
)
|
|
.await;
|
|
}
|
|
}
|
|
}
|
|
Ok(Message::Close(_)) => break,
|
|
Ok(Message::Binary(_)) => {
|
|
warn!("Received unexpected binary WebSocket message, ignoring");
|
|
}
|
|
Ok(_) => {} // Ping/Pong frames handled by axum
|
|
Err(e) => {
|
|
debug!("WebSocket receive error: {e}");
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
Ok::<(), SyncServerError>(())
|
|
});
|
|
|
|
let result: Result<(), SyncServerError> = tokio::select! {
|
|
send_result = &mut send_task => {
|
|
receive_task.abort();
|
|
let _ = receive_task.await;
|
|
match send_result {
|
|
Err(e) => Err(server_error(
|
|
anyhow::Error::from(e).context("WebSocket send task failed"),
|
|
)),
|
|
Ok(inner) => inner,
|
|
}
|
|
},
|
|
receive_result = &mut receive_task => {
|
|
send_task.abort();
|
|
let _ = send_task.await;
|
|
match receive_result {
|
|
Err(e) => Err(server_error(
|
|
anyhow::Error::from(e).context("WebSocket receive task failed"),
|
|
)),
|
|
Ok(inner) => inner,
|
|
}
|
|
},
|
|
};
|
|
|
|
state
|
|
.cursors
|
|
.remove_cursors_of_device(&vault_id, &authed_handshake.handshake.device_id)
|
|
.await;
|
|
|
|
match &result {
|
|
Ok(()) => {
|
|
info!(
|
|
"WebSocket disconnected on vault `{vault_id}` for `{}`",
|
|
authed_handshake.handshake.device_id
|
|
);
|
|
}
|
|
Err(err) => {
|
|
warn!(
|
|
"WebSocket error on vault `{vault_id}` for `{}`: {err}",
|
|
authed_handshake.handshake.device_id
|
|
);
|
|
}
|
|
}
|
|
|
|
result
|
|
}
|