Extract reconcile (#85)

This commit is contained in:
Andras Schmelczer 2025-07-13 11:06:42 +01:00 committed by GitHub
parent 75b020146a
commit bb0e44f06f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
141 changed files with 294 additions and 36720 deletions

View file

@ -1,6 +0,0 @@
target
Dockerfile
.dockerignore
databases
sync_lib/pkg
*.yml

View file

@ -1 +0,0 @@
DATABASE_URL=sqlite://db.sqlite3

3324
backend/Cargo.lock generated

File diff suppressed because it is too large Load diff

View file

@ -1,76 +0,0 @@
[workspace]
resolver = "2"
members = [
"reconcile",
"sync_server",
"sync_lib"
]
[workspace.package]
rust-version = "1.83"
authors = ["Andras Schmelczer <andras@schmelczer.dev>"]
edition = "2024"
license = "MIT"
repository = "https://github.com/schmelczer/vault-link"
version = "0.4.0"
[workspace.dependencies]
serde = { version = "1.0.219", default-features = false, features = ["derive"] }
thiserror = { version = "2.0.12", default-features = false }
[profile.release]
codegen-units = 1
lto = true
opt-level = 3
strip="debuginfo" # Keep some info for better panics
[workspace.lints.rust]
unsafe_code = "forbid"
rust_2018_idioms = { level = "warn", priority = -1 }
missing_debug_implementations = "warn"
[workspace.lints.clippy]
await_holding_lock = "warn"
dbg_macro = "warn"
empty_enum = "warn"
enum_glob_use = "warn"
exit = "warn"
filter_map_next = "warn"
fn_params_excessive_bools = "warn"
if_let_mutex = "warn"
imprecise_flops = "warn"
inefficient_to_string = "warn"
linkedlist = "warn"
lossy_float_literal = "warn"
macro_use_imports = "warn"
match_wildcard_for_single_variants = "warn"
mem_forget = "warn"
needless_borrow = "warn"
needless_continue = "warn"
option_option = "warn"
rest_pat_in_fully_bound_structs = "warn"
str_to_string = "warn"
suboptimal_flops = "warn"
todo = "warn"
uninlined_format_args = "warn"
unnested_or_patterns = "warn"
unused_self = "warn"
verbose_file_reads = "warn"
large_stack_arrays = { level = "allow", priority = 1 } # https://github.com/rust-lang/rust-clippy/issues/13774
# TODO: fix these
cast_possible_truncation = { level = "allow", priority = 1 }
cast_sign_loss = { level = "allow", priority = 1 }
cast_possible_wrap = { level = "allow", priority = 1 }
# Silly lints
implicit_return = { level = "allow", priority = 1 }
question_mark_used = { level = "allow", priority = 1 }
struct_field_names = { level = "allow", priority = 1 }
single_char_lifetime_names = { level = "allow", priority = 1 }
single_call_fn = { level = "allow", priority = 1 }
similar_names = { level = "allow", priority = 1 }
missing_docs_in_private_items = { level = "allow", priority = 1 }
pedantic = { level = "warn", priority = 0 }

View file

@ -8,9 +8,9 @@ RUN cargo install sqlx-cli
COPY . .
RUN sqlx database create --database-url sqlite://db.sqlite3
RUN sqlx migrate run --source sync_server/src/app_state/database/migrations --database-url sqlite://db.sqlite3
RUN sqlx migrate run --source sync-server/src/app_state/database/migrations --database-url sqlite://db.sqlite3
RUN cargo build --package sync_server --release --target x86_64-unknown-linux-musl
RUN cargo build --release --target x86_64-unknown-linux-musl
# Runtime image
FROM alpine:3.22.0

View file

@ -1,23 +0,0 @@
[package]
name = "reconcile"
version.workspace = true
edition.workspace = true
authors.workspace = true
license.workspace = true
repository.workspace = true
[dependencies]
serde = { version = "1.0.219", optional = true, features = ["derive"] }
[features]
serde = [ "dep:serde" ]
[dev-dependencies]
insta = "1.42.2"
pretty_assertions = "1.4.1"
serde = { version = "1.0.219", features = ["derive"] }
serde_yaml ="0.9.34"
test-case = "3.3.1"
[lints]
workspace = true

View file

@ -1,2 +0,0 @@
pub mod myers;
pub mod raw_operation;

View file

@ -1,357 +0,0 @@
//! Taken from <https://github.com/mitsuhiko/similar/blob/7e15c44de11a1cd61e1149189929e189ef977fd8/src/algorithms/myers.rs>
//!
//! Myers' diff algorithm.
//!
//! * time: `O((N+M)D)`
//! * space `O(N+M)`
//!
//! See [the original article by Eugene W. Myers](http://www.xmailserver.org/diff2.pdf)
//! describing it.
//!
//! The implementation of this algorithm is based on the implementation by
//! Brandon Williams.
//!
//! # Heuristics
//!
//! At present this implementation of Myers' does not implement any more
//! advanced heuristics that would solve some pathological cases. For instance
//! passing two large and completely distinct sequences to the algorithm will
//! make it spin without making reasonable progress.
//! For potential improvements here see [similar#15](https://github.com/mitsuhiko/similar/issues/15).
use std::{
ops::{Index, IndexMut, Range},
vec,
};
use super::raw_operation::RawOperation;
use crate::{
tokenizer::token::Token,
utils::{common_prefix_len::common_prefix_len, common_suffix_len::common_suffix_len},
};
/// Myers' diff algorithm with deadline.
///
/// Diff `old`, between indices `old_range` and `new` between indices
/// `new_range`.
///
/// The returned `RawOperations` all have a token count of 1.
pub fn diff<T>(old: &[Token<T>], new: &[Token<T>]) -> Vec<RawOperation<T>>
where
T: PartialEq + Clone + std::fmt::Debug,
{
let max_d = (old.len() + new.len()).div_ceil(2) + 1;
let mut vb = V::new(max_d);
let mut vf = V::new(max_d);
let mut result: Vec<RawOperation<T>> = vec![];
conquer(
old,
0..old.len(),
new,
0..new.len(),
&mut vf,
&mut vb,
&mut result,
);
debug_assert!(
result.iter().all(|op| op.tokens().len() == 1),
"All operations should be of length 1"
);
result
}
// A D-path is a path which starts at (0,0) that has exactly D non-diagonal
// edges. All D-paths consist of a (D - 1)-path followed by a non-diagonal edge
// and then a possibly empty sequence of diagonal edges called a snake.
/// `V` contains the endpoints of the furthest reaching `D-paths`. For each
/// recorded endpoint `(x,y)` in diagonal `k`, we only need to retain `x`
/// because `y` can be computed from `x - k`. In other words, `V` is an array of
/// integers where `V[k]` contains the row index of the endpoint of the furthest
/// reaching path in diagonal `k`.
///
/// We can't use a traditional Vec to represent `V` since we use `k` as an index
/// and it can take on negative values. So instead `V` is represented as a
/// light-weight wrapper around a Vec plus an `offset` which is the maximum
/// value `k` can take on in order to map negative `k`'s back to a value >= 0.
#[derive(Debug)]
struct V {
offset: isize,
v: Vec<usize>, // Look into initializing this to -1 and storing isize
}
impl V {
fn new(max_d: usize) -> Self {
Self {
offset: max_d as isize,
v: vec![0; 2 * max_d],
}
}
fn len(&self) -> usize { self.v.len() }
}
impl Index<isize> for V {
type Output = usize;
fn index(&self, index: isize) -> &Self::Output { &self.v[(index + self.offset) as usize] }
}
impl IndexMut<isize> for V {
fn index_mut(&mut self, index: isize) -> &mut Self::Output {
&mut self.v[(index + self.offset) as usize]
}
}
fn split_at(range: Range<usize>, at: usize) -> (Range<usize>, Range<usize>) {
(range.start..at, at..range.end)
}
/// A `Snake` is a sequence of diagonal edges in the edit graph. Normally
/// a snake has a start end end point (and it is possible for a snake to have
/// a length of zero, meaning the start and end points are the same) however
/// we do not need the end point which is why it's not implemented here.
///
/// The divide part of a divide-and-conquer strategy. A D-path has D+1 snakes
/// some of which may be empty. The divide step requires finding the ceil(D/2) +
/// 1 or middle snake of an optimal D-path. The idea for doing so is to
/// simultaneously run the basic algorithm in both the forward and reverse
/// directions until furthest reaching forward and reverse paths starting at
/// opposing corners 'overlap'.
fn find_middle_snake<T>(
old: &[Token<T>],
old_range: Range<usize>,
new: &[Token<T>],
new_range: Range<usize>,
vf: &mut V,
vb: &mut V,
) -> Option<(usize, usize)>
where
T: PartialEq + Clone + std::fmt::Debug,
{
let n = old_range.len();
let m = new_range.len();
// By Lemma 1 in the paper, the optimal edit script length is odd or even as
// `delta` is odd or even.
let delta = n as isize - m as isize;
let odd = delta & 1 == 1;
// The initial point at (0, -1)
vf[1] = 0;
// The initial point at (N, M+1)
vb[1] = 0;
let d_max = (n + m).div_ceil(2) + 1;
assert!(vf.len() >= d_max);
assert!(vb.len() >= d_max);
for d in 0..d_max as isize {
// Forward path
for k in (-d..=d).rev().step_by(2) {
let mut x = if k == -d || (k != d && vf[k - 1] < vf[k + 1]) {
vf[k + 1]
} else {
vf[k - 1] + 1
};
let y = (x as isize - k) as usize;
// The coordinate of the start of a snake
let (x0, y0) = (x, y);
// While these sequences are identical, keep moving through the
// graph with no cost
if x < old_range.len() && y < new_range.len() {
let advance = common_prefix_len(
old,
old_range.start + x..old_range.end,
new,
new_range.start + y..new_range.end,
);
x += advance;
}
// This is the new best x value
vf[k] = x;
// Only check for connections from the forward search when N - M is
// odd and when there is a reciprocal k line coming from the other
// direction.
if odd && (k - delta).abs() <= (d - 1) {
// TODO optimize this so we don't have to compare against n
if vf[k] + vb[-(k - delta)] >= n {
// Return the snake
return Some((x0 + old_range.start, y0 + new_range.start));
}
}
}
// Backward path
for k in (-d..=d).rev().step_by(2) {
let mut x = if k == -d || (k != d && vb[k - 1] < vb[k + 1]) {
vb[k + 1]
} else {
vb[k - 1] + 1
};
let mut y = (x as isize - k) as usize;
// The coordinate of the start of a snake
if x < n && y < m {
let advance = common_suffix_len(
old,
old_range.start..old_range.start + n - x,
new,
new_range.start..new_range.start + m - y,
);
x += advance;
y += advance;
}
// This is the new best x value
vb[k] = x;
if !odd && (k - delta).abs() <= d {
// TODO optimize this so we don't have to compare against n
if vb[k] + vf[-(k - delta)] >= n {
// Return the snake
return Some((n - x + old_range.start, m - y + new_range.start));
}
}
}
// TODO: Maybe there's an opportunity to optimize and bail early?
}
None
}
fn conquer<T>(
old: &[Token<T>],
mut old_range: Range<usize>,
new: &[Token<T>],
mut new_range: Range<usize>,
vf: &mut V,
vb: &mut V,
result: &mut Vec<RawOperation<T>>,
) where
T: PartialEq + Clone + std::fmt::Debug,
{
// Check for common prefix
let common_prefix_len = common_prefix_len(old, old_range.clone(), new, new_range.clone());
if common_prefix_len > 0 {
result.extend(
old[old_range.start..old_range.start + common_prefix_len]
.iter()
.map(|token| RawOperation::Equal(vec![token.clone()])),
);
}
old_range.start += common_prefix_len;
new_range.start += common_prefix_len;
// Check for common suffix
let common_suffix_len = common_suffix_len(old, old_range.clone(), new, new_range.clone());
let common_suffix = (
old_range.end - common_suffix_len,
new_range.end - common_suffix_len,
);
old_range.end -= common_suffix_len;
new_range.end -= common_suffix_len;
if old_range.is_empty() && new_range.is_empty() {
// do nothing
} else if new_range.is_empty() {
result.extend(
old[old_range.start..old_range.start + old_range.len()]
.iter()
.map(|token| RawOperation::Delete(vec![token.clone()])),
);
} else if old_range.is_empty() {
result.extend(
new[new_range.start..new_range.start + new_range.len()]
.iter()
.map(|token| RawOperation::Insert(vec![token.clone()])),
);
} else if let Some((x_start, y_start)) =
find_middle_snake(old, old_range.clone(), new, new_range.clone(), vf, vb)
{
let (old_a, old_b) = split_at(old_range, x_start);
let (new_a, new_b) = split_at(new_range, y_start);
conquer(old, old_a, new, new_a, vf, vb, result);
conquer(old, old_b, new, new_b, vf, vb, result);
} else {
result.extend(
old[old_range.start..old_range.end]
.iter()
.map(|token| RawOperation::Delete(vec![token.clone()])),
);
result.extend(
new[new_range.start..new_range.end]
.iter()
.map(|token| RawOperation::Insert(vec![token.clone()])),
);
}
if common_suffix_len > 0 {
result.extend(
old[common_suffix.0..common_suffix.0 + common_suffix_len]
.iter()
.map(|token| RawOperation::Equal(vec![token.clone()])),
);
}
}
#[cfg(test)]
mod tests {
use insta::assert_debug_snapshot;
use super::*;
#[test]
fn test_empty_diff() {
let old: Vec<Token<String>> = vec![];
let new: Vec<Token<String>> = vec![];
let result = diff(&old, &new);
assert_eq!(result.len(), 0);
}
#[test]
fn test_identical_content() {
let content = vec!["a".into(), "b".into(), "c".into()];
let result = diff(&content, &content);
assert_debug_snapshot!(result);
}
#[test]
fn test_insert_only() {
let old: Vec<Token<String>> = vec![];
let new: Vec<Token<String>> = vec!["a".into(), "b".into()];
let result = diff(&old, &new);
assert_debug_snapshot!(result);
}
#[test]
fn test_delete_only() {
let old = vec!["a".into(), "b".into()];
let new: Vec<Token<String>> = vec![];
let result = diff(&old, &new);
assert_debug_snapshot!(result);
}
#[test]
fn test_prefix_and_suffix() {
let old = vec!["a".into(), "b".into(), "c".into(), "d".into()];
let new = vec!["a".into(), "x".into(), "d".into()];
let result = diff(&old, &new);
assert_debug_snapshot!(result);
}
#[test]
fn test_complex_diff() {
let old = vec!["a".into(), "b".into(), "c".into(), "d".into()];
let new = vec!["a".into(), "x".into(), "c".into(), "y".into()];
let result = diff(&old, &new);
assert_debug_snapshot!(result);
}
}

View file

@ -1,64 +0,0 @@
use crate::tokenizer::token::Token;
#[derive(Debug, Clone, PartialEq)]
pub enum RawOperation<T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
Insert(Vec<Token<T>>),
Delete(Vec<Token<T>>),
Equal(Vec<Token<T>>),
}
impl<T> RawOperation<T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
pub fn tokens(&self) -> &Vec<Token<T>> {
match self {
RawOperation::Insert(tokens)
| RawOperation::Delete(tokens)
| RawOperation::Equal(tokens) => tokens,
}
}
pub fn original_text_length(&self) -> usize {
self.tokens().iter().map(Token::get_original_length).sum()
}
pub fn get_original_text(self) -> String { self.tokens().iter().map(Token::original).collect() }
pub fn is_left_joinable(&self) -> bool {
let first_token = self.tokens().first();
first_token.is_none_or(super::super::tokenizer::token::Token::get_is_left_joinable)
}
pub fn is_right_joinable(&self) -> bool {
let last_token = self.tokens().last();
last_token.is_none_or(super::super::tokenizer::token::Token::get_is_right_joinable)
}
/// Extends the operation with another operation. Only operations of the
/// same type as self can be used to extend self, otherwise the function
/// will panic.
pub fn extend(self, other: RawOperation<T>) -> RawOperation<T> {
debug_assert!(
std::mem::discriminant(&self) == std::mem::discriminant(&other),
"Cannot extend operations of different types. This should have been handled before \
calling this function."
);
match (self, other) {
(RawOperation::Insert(tokens1), RawOperation::Insert(tokens2)) => {
RawOperation::Insert(tokens1.into_iter().chain(tokens2).collect())
}
(RawOperation::Delete(tokens1), RawOperation::Delete(tokens2)) => {
RawOperation::Delete(tokens1.into_iter().chain(tokens2).collect())
}
(RawOperation::Equal(tokens1), RawOperation::Equal(tokens2)) => {
RawOperation::Equal(tokens1.into_iter().chain(tokens2).collect())
}
_ => unreachable!("Only operations of the same type can be extended"),
}
}
}

View file

@ -1,67 +0,0 @@
---
source: reconcile/src/diffs/myers.rs
expression: result
snapshot_kind: text
---
[
Equal(
[
Token {
normalised: "a",
original: "a",
is_left_joinable: true,
is_right_joinable: true,
},
],
),
Insert(
[
Token {
normalised: "x",
original: "x",
is_left_joinable: true,
is_right_joinable: true,
},
],
),
Delete(
[
Token {
normalised: "b",
original: "b",
is_left_joinable: true,
is_right_joinable: true,
},
],
),
Equal(
[
Token {
normalised: "c",
original: "c",
is_left_joinable: true,
is_right_joinable: true,
},
],
),
Insert(
[
Token {
normalised: "y",
original: "y",
is_left_joinable: true,
is_right_joinable: true,
},
],
),
Delete(
[
Token {
normalised: "d",
original: "d",
is_left_joinable: true,
is_right_joinable: true,
},
],
),
]

View file

@ -1,27 +0,0 @@
---
source: reconcile/src/diffs/myers.rs
expression: result
snapshot_kind: text
---
[
Delete(
[
Token {
normalised: "a",
original: "a",
is_left_joinable: true,
is_right_joinable: true,
},
],
),
Delete(
[
Token {
normalised: "b",
original: "b",
is_left_joinable: true,
is_right_joinable: true,
},
],
),
]

View file

@ -1,37 +0,0 @@
---
source: reconcile/src/diffs/myers.rs
expression: result
snapshot_kind: text
---
[
Equal(
[
Token {
normalised: "a",
original: "a",
is_left_joinable: true,
is_right_joinable: true,
},
],
),
Equal(
[
Token {
normalised: "b",
original: "b",
is_left_joinable: true,
is_right_joinable: true,
},
],
),
Equal(
[
Token {
normalised: "c",
original: "c",
is_left_joinable: true,
is_right_joinable: true,
},
],
),
]

View file

@ -1,27 +0,0 @@
---
source: reconcile/src/diffs/myers.rs
expression: result
snapshot_kind: text
---
[
Insert(
[
Token {
normalised: "a",
original: "a",
is_left_joinable: true,
is_right_joinable: true,
},
],
),
Insert(
[
Token {
normalised: "b",
original: "b",
is_left_joinable: true,
is_right_joinable: true,
},
],
),
]

View file

@ -1,57 +0,0 @@
---
source: reconcile/src/diffs/myers.rs
expression: result
snapshot_kind: text
---
[
Equal(
[
Token {
normalised: "a",
original: "a",
is_left_joinable: true,
is_right_joinable: true,
},
],
),
Delete(
[
Token {
normalised: "b",
original: "b",
is_left_joinable: true,
is_right_joinable: true,
},
],
),
Delete(
[
Token {
normalised: "c",
original: "c",
is_left_joinable: true,
is_right_joinable: true,
},
],
),
Insert(
[
Token {
normalised: "x",
original: "x",
is_left_joinable: true,
is_right_joinable: true,
},
],
),
Equal(
[
Token {
normalised: "d",
original: "d",
is_left_joinable: true,
is_right_joinable: true,
},
],
),
]

View file

@ -1,10 +0,0 @@
mod diffs;
mod operation_transformation;
mod tokenizer;
mod utils;
pub use operation_transformation::{
CursorPosition, EditedText, TextWithCursors, reconcile, reconcile_with_cursors,
reconcile_with_tokenizer,
};
pub use tokenizer::{Tokenizer, token::Token};

View file

@ -1,166 +0,0 @@
mod cursor;
mod edited_text;
mod merge_context;
mod operation;
mod ordered_operation;
pub use cursor::{CursorPosition, TextWithCursors};
pub use edited_text::EditedText;
pub use operation::Operation;
use crate::Tokenizer;
#[must_use]
pub fn reconcile(original: &str, left: &str, right: &str) -> String {
reconcile_with_cursors(original, left.into(), right.into())
.text
.to_string()
}
#[must_use]
pub fn reconcile_with_cursors<'a>(
original: &'a str,
left: TextWithCursors<'a>,
right: TextWithCursors<'a>,
) -> TextWithCursors<'static> {
let left_operations = EditedText::from_strings(original, left);
let right_operations = EditedText::from_strings(original, right);
let merged_operations = left_operations.merge(right_operations);
TextWithCursors::new_owned(merged_operations.apply(), merged_operations.cursors)
}
#[must_use]
pub fn reconcile_with_tokenizer<'a, F, T>(
original: &str,
left: TextWithCursors<'a>,
right: TextWithCursors<'a>,
tokenizer: &Tokenizer<T>,
) -> TextWithCursors<'static>
where
T: PartialEq + Clone + std::fmt::Debug,
{
let left_operations = EditedText::from_strings_with_tokenizer(original, left, tokenizer);
let right_operations = EditedText::from_strings_with_tokenizer(original, right, tokenizer);
let merged_operations = left_operations.merge(right_operations);
TextWithCursors::new_owned(merged_operations.apply(), merged_operations.cursors)
}
#[cfg(test)]
mod test {
use std::{fs, ops::Range, path::Path};
use pretty_assertions::assert_eq;
use test_case::test_matrix;
use super::*;
use crate::CursorPosition;
#[test]
fn test_cursor_complex() {
let original = "this is some complex text to test cursor positions";
let left = TextWithCursors::new(
"this is really complex text for testing cursor positions",
vec![
CursorPosition {
id: 0,
char_index: 8,
}, // after "this is "
CursorPosition {
id: 1,
char_index: 22,
}, // after "this is really complex text"
],
);
let right = TextWithCursors::new(
"that was some complex sample to test cursor movements",
vec![
CursorPosition {
id: 2,
char_index: 5,
}, // after "that "
CursorPosition {
id: 3,
char_index: 29,
}, // after "some complex sample "
],
);
let merged = reconcile_with_cursors(original, left, right);
assert_eq!(
merged,
TextWithCursors::new(
"that was really complex sample for testing cursor movements",
vec![
CursorPosition {
id: 2,
char_index: 5
}, // unchanged
CursorPosition {
id: 0,
char_index: 9
}, // before "really"
CursorPosition {
id: 1,
char_index: 23
}, // inside of "s|ample" because "text" got replaced by "sample"
CursorPosition {
id: 3,
char_index: 43
}, // before "cursor movements"
]
)
);
}
#[ignore = "expensive to run, only run in CI"]
#[test_matrix( [
"pride_and_prejudice.txt",
"room_with_a_view.txt",
"kun_lu.txt",
"blns.txt"
], [
"pride_and_prejudice.txt",
"room_with_a_view.txt",
"kun_lu.txt",
"blns.txt"
], [
"pride_and_prejudice.txt",
"room_with_a_view.txt",
"kun_lu.txt",
"blns.txt"
], [0..10000, 10000..20000], [0..10000, 10000..20000], [0..10000, 10000..20000])]
fn test_merge_files_without_panic(
file_name_1: &str,
file_name_2: &str,
file_name_3: &str,
range_1: Range<usize>,
range_2: Range<usize>,
range_3: Range<usize>,
) {
let files = [file_name_1, file_name_2, file_name_3];
let permutations = [range_1, range_2, range_3];
let root = Path::new("tests/resources/");
let contents = files
.iter()
.zip(permutations.iter())
.map(|(file, range)| {
let path = root.join(file);
fs::read_to_string(&path)
.unwrap()
.chars()
.skip(range.start)
.take(range.end)
.collect::<String>()
})
.collect::<Vec<_>>();
let _ = reconcile(&contents[0], &contents[1], &contents[2]);
}
}

View file

@ -1,57 +0,0 @@
use std::borrow::Cow;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
// CursorPosition represents the position of an identifiable cursor in a text
// document based on its (UTF-8) character index.
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone, PartialEq, Default)]
pub struct CursorPosition {
pub id: usize,
pub char_index: usize,
}
impl CursorPosition {
#[must_use]
pub fn with_index(&self, index: usize) -> Self {
CursorPosition {
id: self.id,
char_index: index,
}
}
}
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone, PartialEq, Default)]
pub struct TextWithCursors<'a> {
pub text: Cow<'a, str>,
pub cursors: Vec<CursorPosition>,
}
impl<'a> TextWithCursors<'a> {
#[must_use]
pub fn new(text: &'a str, cursors: Vec<CursorPosition>) -> Self {
Self {
text: text.into(),
cursors,
}
}
#[must_use]
pub fn new_owned(text: String, cursors: Vec<CursorPosition>) -> Self {
Self {
text: text.into(),
cursors,
}
}
}
impl<'a> From<&'a str> for TextWithCursors<'a> {
fn from(text: &'a str) -> Self {
Self {
text: text.into(),
cursors: Vec::new(),
}
}
}

View file

@ -1,381 +0,0 @@
use core::iter;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use super::{CursorPosition, Operation, TextWithCursors, ordered_operation::OrderedOperation};
use crate::{
diffs::{myers::diff, raw_operation::RawOperation},
operation_transformation::merge_context::MergeContext,
tokenizer::{Tokenizer, word_tokenizer::word_tokenizer},
utils::{merge_iters::MergeSorted as _, side::Side, string_builder::StringBuilder},
};
/// A sequence of operations that can be applied to a text document.
/// `EditedText` supports merging two sequences of operations using the
/// principle of Operational Transformation.
///
/// It's mainly created through the `from_strings` method, then merged with
/// another `EditedText` derived from the same original text and then applied to
/// the original text to get the reconciled text of concurrent edits.
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone, PartialEq, Default)]
pub struct EditedText<'a, T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
text: &'a str,
operations: Vec<OrderedOperation<T>>,
pub(crate) cursors: Vec<CursorPosition>,
}
impl<'a> EditedText<'a, String> {
/// Create an `EditedText` from the given original (old) and updated (new)
/// strings. The returned `EditedText` represents the changes from the
/// original to the updated text. When the return value is applied to
/// the original text, it will result in the updated text. The default
/// word tokenizer is used to tokenize the text which splits the text on
/// whitespaces.
#[must_use]
pub fn from_strings(original: &'a str, updated: TextWithCursors<'a>) -> Self {
Self::from_strings_with_tokenizer(original, updated, &word_tokenizer)
}
}
impl<'a, T> EditedText<'a, T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
/// Create an `EditedText` from the given original (old) and updated (new)
/// strings. The returned `EditedText` represents the changes from the
/// original to the updated text. When the return value is applied to
/// the original text, it will result in the updated text. The tokenizer
/// function is used to tokenize the text.
pub fn from_strings_with_tokenizer(
original: &'a str,
updated: TextWithCursors<'a>,
tokenizer: &Tokenizer<T>,
) -> Self {
let original_tokens = (tokenizer)(original);
let updated_tokens = (tokenizer)(&updated.text);
let diff: Vec<RawOperation<T>> = diff(&original_tokens, &updated_tokens);
Self::new(
original,
Self::cook_operations(Self::elongate_operations(diff)).collect(),
updated.cursors,
)
}
fn elongate_operations<I>(raw_operations: I) -> Vec<RawOperation<T>>
where
I: IntoIterator<Item = RawOperation<T>>,
{
// This might look bad, but this makes sense. The inserts and deltes can be
// interleaved, such as: IDIDID and we need to turn this into IIIDDD.
// So we need to keep track of both the last insert and delete operations, not
// just the last one.
let mut maybe_previous_insert: Option<RawOperation<T>> = None;
let mut maybe_previous_delete: Option<RawOperation<T>> = None;
let mut result: Vec<RawOperation<T>> = raw_operations
.into_iter()
.flat_map(|next| match next {
RawOperation::Insert(..) => match maybe_previous_insert.take() {
Some(prev) if prev.is_right_joinable() && next.is_left_joinable() => {
maybe_previous_insert = Some(prev.extend(next));
Box::new(iter::empty()) as Box<dyn Iterator<Item = RawOperation<T>>>
}
prev => {
maybe_previous_insert = Some(next);
Box::new(prev.into_iter())
}
},
RawOperation::Delete(..) => match maybe_previous_delete.take() {
Some(prev) if prev.is_right_joinable() && next.is_left_joinable() => {
maybe_previous_delete = Some(prev.extend(next));
Box::new(iter::empty()) as Box<dyn Iterator<Item = RawOperation<T>>>
}
prev => {
maybe_previous_delete = Some(next);
Box::new(prev.into_iter())
}
},
RawOperation::Equal(..) => Box::new(
maybe_previous_insert
.take()
.into_iter()
.chain(maybe_previous_delete.take())
.chain(iter::once(next)),
)
as Box<dyn Iterator<Item = RawOperation<T>>>,
})
.collect();
if let Some(prev) = maybe_previous_insert {
result.push(prev);
}
if let Some(prev) = maybe_previous_delete {
result.push(prev);
}
result
}
// Turn raw operations into ordered operations while keeping track of old & new
// indexes.
fn cook_operations<I>(raw_operations: I) -> impl Iterator<Item = OrderedOperation<T>>
where
I: IntoIterator<Item = RawOperation<T>>,
{
let mut new_index = 0; // this is the start index of the operation on the new text
let mut order = 0; // this is the start index of the operation on the original text
raw_operations.into_iter().filter_map(move |raw_operation| {
let length = raw_operation.original_text_length();
match raw_operation {
RawOperation::Equal(..) => {
let op = if cfg!(debug_assertions) {
Operation::create_equal_with_text(
new_index,
raw_operation.get_original_text(),
)
} else {
Operation::create_equal(new_index, length)
}
.map(|operation| OrderedOperation { order, operation });
new_index += length;
order += length;
op
}
RawOperation::Insert(tokens) => {
let op = Operation::create_insert(new_index, tokens)
.map(|operation| OrderedOperation { order, operation });
new_index += length;
op
}
RawOperation::Delete(..) => {
let op = if cfg!(debug_assertions) {
Operation::create_delete_with_text(
new_index,
raw_operation.get_original_text(),
)
} else {
Operation::create_delete(new_index, length)
}
.map(|operation| OrderedOperation { order, operation });
order += length;
op
}
}
})
}
/// Create a new `EditedText` with the given operations.
/// The operations must be in the order in which they are meant to be
/// applied. The operations must not overlap.
fn new(
text: &'a str,
operations: Vec<OrderedOperation<T>>,
mut cursors: Vec<CursorPosition>,
) -> Self {
operations
.iter()
.zip(operations.iter().skip(1))
.for_each(|(previous, next)| {
debug_assert!(
previous.operation.start_index() <= next.operation.start_index(),
"{} must not come before {} yet it does",
previous.operation,
next.operation
);
});
cursors.sort_by_key(|cursor| cursor.char_index);
Self {
text,
operations,
cursors,
}
}
#[must_use]
pub fn merge(self, other: Self) -> Self {
debug_assert_eq!(
self.text, other.text,
"`EditedText`-s must be derived from the same text to be mergable"
);
let mut left_merge_context = MergeContext::default();
let mut right_merge_context = MergeContext::default();
let mut merged_cursors = Vec::with_capacity(self.cursors.len() + other.cursors.len());
let mut left_cursors = self.cursors.into_iter().peekable();
let mut right_cursors = other.cursors.into_iter().peekable();
let merged_operations: Vec<OrderedOperation<T>> = self
.operations
.into_iter()
// The current text is always the left; the other operation is the right side.
.map(|op| (op, Side::Left))
.merge_sorted_by_key(
other.operations.into_iter().map(|op| (op, Side::Right)),
|(operation, _)| {
(
operation.order,
operation.operation.start_index(),
// Make sure that the ordering is deterministic regardless which text
// is left or right.
match &operation.operation {
Operation::Equal { index, .. } => index.to_string(),
Operation::Insert { text, .. } => text
.iter()
.map(crate::tokenizer::token::Token::original)
.collect::<String>(),
Operation::Delete {
deleted_character_count,
..
} => deleted_character_count.to_string(),
},
)
},
)
.flat_map(|(OrderedOperation { order, operation }, side)| {
let original_start = operation.start_index() as i64;
let original_end = operation.end_index();
let original_length = operation.len() as i64;
let result = match side {
Side::Left => operation.merge_operations_with_context(
&mut right_merge_context,
&mut left_merge_context,
),
Side::Right => operation.merge_operations_with_context(
&mut left_merge_context,
&mut right_merge_context,
),
};
if let Some(ref op @ (Operation::Insert { .. } | Operation::Equal { .. })) = result
{
let shift = op.start_index() as i64 - original_start + op.len() as i64
- original_length;
match side {
Side::Left => {
while let Some(cursor) =
left_cursors.next_if(|cursor| cursor.char_index <= original_end + 1)
{
merged_cursors.push(cursor.with_index(
(op.start_index() as i64).max(cursor.char_index as i64 + shift)
as usize,
));
}
}
Side::Right => {
while let Some(cursor) = right_cursors
.next_if(|cursor| cursor.char_index <= original_end + 1)
{
merged_cursors.push(cursor.with_index(
(op.start_index() as i64).max(cursor.char_index as i64 + shift)
as usize,
));
}
}
}
}
result
.map(|operation| OrderedOperation { order, operation })
.into_iter()
})
.collect();
let last_index = merged_operations
.iter()
.filter(|operation| {
matches!(
operation.operation,
Operation::Insert { .. } | Operation::Equal { .. }
)
})
.next_back()
.map_or(0, |op| op.operation.end_index());
for cursor in left_cursors.chain(right_cursors) {
merged_cursors.push(cursor.with_index(last_index));
}
Self::new(self.text, merged_operations, merged_cursors)
}
/// Apply the operations to the text and return the resulting text.
#[must_use]
pub fn apply(&self) -> String {
let mut builder: StringBuilder<'_> = StringBuilder::new(self.text);
for OrderedOperation { operation, .. } in &self.operations {
builder = operation.apply(builder);
}
builder.build()
}
}
#[cfg(test)]
mod tests {
use std::env;
use insta::assert_debug_snapshot;
use pretty_assertions::assert_eq;
use super::*;
#[test]
fn test_calculate_operations() {
let left = "hello world! How are you? Adam";
let right = "Hello, my friend! How are you doing? Albert";
let operations = EditedText::from_strings(left, right.into());
insta::assert_debug_snapshot!(operations);
let new_right = operations.apply();
assert_eq!(new_right.to_string(), right);
}
#[test]
fn test_calculate_operations_with_no_diff() {
let text = "hello world!";
let operations = EditedText::from_strings(text, text.into());
assert_debug_snapshot!(operations);
let new_right = operations.apply();
assert_eq!(new_right.to_string(), text);
}
#[test]
fn test_calculate_operations_with_insert() {
let original = "hello world! ...";
let left = "Hello world! I'm Andras.";
let right = "Hello world! How are you?";
let expected = "Hello world! How are you? I'm Andras.";
let operations_1 = EditedText::from_strings(original, left.into());
let operations_2 = EditedText::from_strings(original, right.into());
let operations = operations_1.merge(operations_2);
assert_eq!(operations.apply(), expected);
}
}

View file

@ -1,73 +0,0 @@
use core::fmt::Debug;
use crate::operation_transformation::Operation;
#[derive(Clone, Debug)]
pub struct MergeContext<T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
last_operation: Option<Operation<T>>,
pub shift: i64,
}
impl<T> Default for MergeContext<T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
fn default() -> Self {
MergeContext {
last_operation: None,
shift: 0,
}
}
}
impl<T> MergeContext<T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
pub fn last_operation(&self) -> Option<&Operation<T>> { self.last_operation.as_ref() }
pub fn replace_last_operation(&mut self, operation: Option<Operation<T>>) {
self.last_operation = operation;
}
/// Replace the last delete operation (if there was one) with a new one
/// while applying it to the `shift` in case the last operation
/// was a delete.
pub fn consume_and_replace_last_operation(&mut self, operation: Option<Operation<T>>) {
if let Some(Operation::Delete {
deleted_character_count,
..
}) = self.last_operation.take()
{
self.shift -= deleted_character_count as i64;
}
self.last_operation = operation;
}
/// Remove the last operation (if there was one) in case it is behind the
/// threshold operation. This updates the `shift` in case the last operation
/// was a delete.
pub fn consume_last_operation_if_it_is_too_behind(&mut self, threshold_index: i64) {
if let Some(last_operation) = self.last_operation.as_ref() {
if let Operation::Delete {
deleted_character_count,
..
} = last_operation
{
if threshold_index + self.shift > last_operation.end_index() as i64 {
self.shift -= *deleted_character_count as i64;
self.last_operation = None;
}
} else if let Operation::Insert { .. } = last_operation
&& threshold_index + self.shift - last_operation.len() as i64
> last_operation.end_index() as i64
{
self.last_operation = None;
}
}
}
}

View file

@ -1,513 +0,0 @@
use core::fmt::{Debug, Display};
use std::ops::Range;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use super::merge_context::MergeContext;
use crate::{
Token,
utils::{
find_longest_prefix_contained_within::find_longest_prefix_contained_within,
string_builder::StringBuilder,
},
};
/// Represents a change that can be applied on a `StringBuilder`.
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Clone, PartialEq)]
pub enum Operation<T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
Equal {
index: usize,
length: usize,
#[cfg(debug_assertions)]
text: Option<String>,
},
Insert {
index: usize,
text: Vec<Token<T>>,
},
Delete {
index: usize,
deleted_character_count: usize,
#[cfg(debug_assertions)]
deleted_text: Option<String>,
},
}
impl<T> Operation<T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
/// Creates an equal operation with the given index.
/// This operation is used to indicate that the text at the given index
/// is unchanged.
pub fn create_equal(index: usize, length: usize) -> Option<Self> {
if length == 0 {
return None;
}
Some(Operation::Equal {
index,
length,
#[cfg(debug_assertions)]
text: None,
})
}
pub fn create_equal_with_text(index: usize, text: String) -> Option<Self> {
if text.is_empty() {
return None;
}
Some(Operation::Equal {
index,
length: text.chars().count(),
#[cfg(debug_assertions)]
text: Some(text),
})
}
/// Creates an insert operation with the given index and text.
/// If the text is empty (meaning that the operation would be a no-op),
/// returns None.
pub fn create_insert(index: usize, text: Vec<Token<T>>) -> Option<Self> {
if text.is_empty() {
return None;
}
Some(Operation::Insert { index, text })
}
/// Creates a delete operation with the given index and number of
/// to-be-deleted characters. If the operation would delete 0 (meaning
/// that the operation would be a no-op), returns None.
pub fn create_delete(index: usize, deleted_character_count: usize) -> Option<Self> {
if deleted_character_count == 0 {
return None;
}
Some(Operation::Delete {
index,
deleted_character_count,
#[cfg(debug_assertions)]
deleted_text: None,
})
}
pub fn create_delete_with_text(index: usize, text: String) -> Option<Self> {
if text.is_empty() {
return None;
}
Some(Operation::Delete {
index,
deleted_character_count: text.chars().count(),
#[cfg(debug_assertions)]
deleted_text: Some(text),
})
}
/// Applies the operation to the given `StringBuilder`, returning the
/// modified `StringBuilder`.
///
/// When compiled in debug mode, panics if a delete operation is attempted
/// on a range of text that does not match the text to be deleted.
pub fn apply<'a>(&self, mut builder: StringBuilder<'a>) -> StringBuilder<'a> {
match self {
Operation::Equal {
#[cfg(debug_assertions)]
text,
..
} => {
#[cfg(debug_assertions)]
debug_assert!(
text.as_ref()
.is_none_or(|text| builder.get_slice(self.range()) == *text),
"Text which is supposed to be equal does not match the text in the range"
);
return builder;
}
Operation::Insert { text, .. } => builder.insert(
self.start_index(),
&text.iter().map(Token::original).collect::<String>(),
),
Operation::Delete {
#[cfg(debug_assertions)]
deleted_text,
..
} => {
#[cfg(debug_assertions)]
debug_assert!(
deleted_text
.as_ref()
.is_none_or(|text| builder.get_slice(self.range()) == *text),
"Text to delete does not match the text in the range"
);
builder.delete(self.range());
}
}
builder
}
/// Returns the index of the first character that the operation affects.
pub fn start_index(&self) -> usize {
match self {
Operation::Equal { index, .. }
| Operation::Insert { index, .. }
| Operation::Delete { index, .. } => *index,
}
}
/// Returns the index of the last character that the operation affects.
pub fn end_index(&self) -> usize {
debug_assert!(
self.len() > 0,
" len() must be greater than 0 because operations must be non-empty"
);
self.start_index() + self.len() - 1
}
/// Returns the range of indices of characters that the operation affects.
#[allow(clippy::range_plus_one)]
pub fn range(&self) -> Range<usize> { self.start_index()..self.end_index() + 1 }
/// Returns the number of affected characters. It is always greater than 0
/// because empty operations cannot be created.
pub fn len(&self) -> usize {
match self {
Operation::Equal { length, .. } => *length,
Operation::Insert { text, .. } => text.iter().map(Token::get_original_length).sum(),
Operation::Delete {
deleted_character_count,
..
} => *deleted_character_count,
}
}
/// Creates a new operation with the same type and text but with the given
/// index.
pub fn with_index(self, index: usize) -> Self {
match self {
Operation::Equal {
length,
#[cfg(debug_assertions)]
text,
..
} => Operation::Equal {
index,
length,
#[cfg(debug_assertions)]
text,
},
Operation::Insert { text, .. } => Operation::Insert { index, text },
Operation::Delete {
deleted_character_count,
#[cfg(debug_assertions)]
deleted_text,
..
} => Operation::Delete {
index,
deleted_character_count,
#[cfg(debug_assertions)]
deleted_text,
},
}
}
/// Creates a new operation with the same type and text but with the index
/// shifted by the given offset. The offset can be negative but the
/// resulting index must be non-negative.
///
/// # Panics
///
/// In debug mode, panics if the resulting index is negative.
pub fn with_shifted_index(self, offset: i64) -> Self {
let index = self.start_index() as i64 + offset;
debug_assert!(index >= 0, "Shifted index must be non-negative");
self.with_index(index as usize)
}
/// Merges the operation with the given context, producing a new operation
/// and updating the context. This implements a comples FSM that handles
/// the merging of operations in a way that is consistent with the text.
/// The contexts are updated in-place.
#[allow(clippy::too_many_lines)]
pub fn merge_operations_with_context(
self,
affecting_context: &mut MergeContext<T>,
produced_context: &mut MergeContext<T>,
) -> Option<Operation<T>> {
affecting_context.consume_last_operation_if_it_is_too_behind(self.start_index() as i64);
let operation = self.with_shifted_index(affecting_context.shift);
match (operation, affecting_context.last_operation()) {
(operation @ Operation::Insert { .. }, None | Some(Operation::Equal { .. })) => {
produced_context.shift += operation.len() as i64;
produced_context.consume_and_replace_last_operation(Some(operation.clone()));
Some(operation)
}
(
Operation::Insert { text, index },
Some(Operation::Insert {
text: previous_inserted_text,
..
}),
) => {
// In case the current insert's prefix appears in the previously inserted text,
// we can trim the current insert to only include the non-overlapping part.
// This way, we don't end up duplicating text.
let offset_in_tokens =
find_longest_prefix_contained_within(previous_inserted_text, &text);
let offset_in_length = text
.iter()
.take(offset_in_tokens)
.map(Token::get_original_length)
.sum::<usize>();
let trimmed_operation =
Operation::create_insert(index, text[offset_in_tokens..].to_vec());
affecting_context.shift -= offset_in_length as i64;
produced_context.shift += trimmed_operation
.as_ref()
.map(Operation::len)
.unwrap_or_default() as i64;
produced_context.consume_and_replace_last_operation(trimmed_operation.clone());
trimmed_operation
}
(
operation @ Operation::Delete { .. },
None | Some(Operation::Insert { .. } | Operation::Equal { .. }),
) => {
produced_context.consume_and_replace_last_operation(Some(operation.clone()));
Some(operation)
}
(
operation @ Operation::Insert { .. },
Some(last_delete @ Operation::Delete { .. }),
) => {
produced_context.shift += operation.len() as i64;
debug_assert!(
last_delete.range().contains(&operation.start_index()),
"There is a last delete ({last_delete}) but the operation ({operation}) is \
not contained in it"
);
let difference = operation.start_index() as i64 - last_delete.start_index() as i64;
let moved_operation = operation.with_index(last_delete.start_index());
affecting_context.replace_last_operation(Operation::create_delete(
moved_operation.end_index() + 1,
(last_delete.len() as i64 - difference) as usize,
));
affecting_context.shift -= difference;
produced_context.consume_and_replace_last_operation(Some(moved_operation.clone()));
Some(moved_operation)
}
(
operation @ Operation::Delete { .. },
Some(last_delete @ Operation::Delete { .. }),
) => {
debug_assert!(
last_delete.range().contains(&operation.start_index()),
"There is a last delete ({last_delete}) but the operation ({operation}) is \
not contained in it"
);
let difference = operation.start_index() as i64 - last_delete.start_index() as i64;
let updated_delete = Operation::create_delete(
last_delete.start_index(),
0.max(operation.end_index() as i64 - last_delete.end_index() as i64) as usize,
);
affecting_context.replace_last_operation(Operation::create_delete(
last_delete.start_index(),
0.max(last_delete.end_index() as i64 - operation.end_index() as i64) as usize,
));
affecting_context.shift -= difference;
produced_context.consume_and_replace_last_operation(updated_delete.clone());
updated_delete
}
(
ref operation @ Operation::Equal {
length,
#[cfg(debug_assertions)]
ref text,
..
},
Some(last_delete @ Operation::Delete { .. }),
) => {
debug_assert!(
last_delete.range().contains(&operation.start_index()),
"There is a last delete ({last_delete}) but the operation ({operation}) is \
not contained in it"
);
let overlap = (length as i64)
.min(last_delete.end_index() as i64 - operation.start_index() as i64 + 1);
#[cfg(debug_assertions)]
let result = text.as_ref().map_or_else(
|| {
Operation::create_equal(
operation.end_index().min(last_delete.end_index()),
(length as i64 - overlap) as usize,
)
},
|text| {
Operation::create_equal_with_text(
operation.end_index().min(last_delete.end_index()),
text.chars().skip(overlap as usize).collect::<String>(),
)
},
);
#[cfg(not(debug_assertions))]
let result = Operation::create_equal(
operation.end_index().min(last_delete.end_index()),
(length as i64 - overlap) as usize,
);
result
}
(operation @ Operation::Equal { .. }, _) => Some(operation),
}
}
}
impl<T> Display for Operation<T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match self {
Operation::Equal {
index,
length,
#[cfg(debug_assertions)]
text,
} => {
#[cfg(debug_assertions)]
write!(
f,
"<equal {} from index {}>",
text.as_ref()
.map(|text| format!("'{text}'"))
.unwrap_or(format!("{length} characters")),
index
)?;
#[cfg(not(debug_assertions))]
write!(f, "<equal {length} from index {index}>")?;
Ok(())
}
Operation::Insert { index, text } => {
write!(
f,
"<insert '{}' from index {}>",
text.iter().map(Token::original).collect::<String>(),
index
)
}
Operation::Delete {
index,
deleted_character_count,
#[cfg(debug_assertions)]
deleted_text,
} => {
#[cfg(debug_assertions)]
write!(
f,
"<delete {} from index {}>",
deleted_text
.as_ref()
.map(|text| format!("'{text}'"))
.unwrap_or(format!("{deleted_character_count} characters")),
index
)?;
#[cfg(not(debug_assertions))]
write!(
f,
"<delete {deleted_character_count} characters from index {index}>",
)?;
Ok(())
}
}
}
}
impl<T> Debug for Operation<T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { write!(f, "{self}") }
}
#[cfg(test)]
mod tests {
use pretty_assertions::assert_eq;
use super::*;
#[test]
#[should_panic(expected = "Shifted index must be non-negative")]
fn test_shifting_error() {
insta::assert_debug_snapshot!(
Operation::create_insert(1, vec!["hi".into()])
.unwrap()
.with_shifted_index(-2)
);
}
#[test]
fn test_apply_delete_with_create() {
let builder = StringBuilder::new("hello world");
let operation = Operation::<()>::create_delete_with_text(5, " world".to_owned()).unwrap();
assert_eq!(operation.apply(builder).build(), "hello");
}
#[test]
fn test_apply_insert() {
let builder = StringBuilder::new("hello");
let operation = Operation::create_insert(5, vec![" my friend".into()]).unwrap();
assert_eq!(operation.apply(builder).build(), "hello my friend");
}
}

View file

@ -1,14 +0,0 @@
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::operation_transformation::Operation;
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone, PartialEq)]
pub struct OrderedOperation<T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
pub order: usize,
pub operation: Operation<T>,
}

View file

@ -1,43 +0,0 @@
---
source: reconcile/src/operation_transformation/edited_text.rs
expression: operations
snapshot_kind: text
---
EditedText {
text: "hello world! How are you? Adam",
operations: [
OrderedOperation {
order: 0,
operation: <insert 'Hello, my friend!' from index 0>,
},
OrderedOperation {
order: 0,
operation: <delete 'hello world!' from index 17>,
},
OrderedOperation {
order: 12,
operation: <equal ' ' from index 17>,
},
OrderedOperation {
order: 13,
operation: <equal 'How' from index 18>,
},
OrderedOperation {
order: 16,
operation: <equal ' ' from index 21>,
},
OrderedOperation {
order: 17,
operation: <equal 'are' from index 22>,
},
OrderedOperation {
order: 20,
operation: <insert ' you doing? Albert' from index 25>,
},
OrderedOperation {
order: 20,
operation: <delete ' you? Adam' from index 43>,
},
],
cursors: [],
}

View file

@ -1,23 +0,0 @@
---
source: reconcile/src/operation_transformation/edited_text.rs
expression: operations
snapshot_kind: text
---
EditedText {
text: "hello world!",
operations: [
OrderedOperation {
order: 0,
operation: <equal 'hello' from index 0>,
},
OrderedOperation {
order: 5,
operation: <equal ' ' from index 5>,
},
OrderedOperation {
order: 6,
operation: <equal 'world!' from index 6>,
},
],
cursors: [],
}

View file

@ -1,61 +0,0 @@
---
source: reconcile/src/operations/edited_text.rs
expression: operations
snapshot_kind: text
---
EditedText {
text: "hello world! How are you? Adam",
operations: [
OrderedOperation {
order: 0,
operation: Insert {
index: 0,
text: "Hello, my friend! ",
},
},
OrderedOperation {
order: 0,
operation: Delete {
index: 18,
deleted_character_count: 13,
deleted_text: Some(
"hello world! ",
),
},
},
OrderedOperation {
order: 21,
operation: Delete {
index: 26,
deleted_character_count: 5,
deleted_text: Some(
"you? ",
),
},
},
OrderedOperation {
order: 26,
operation: Delete {
index: 26,
deleted_character_count: 5,
deleted_text: Some(
" Adam",
),
},
},
OrderedOperation {
order: 31,
operation: Insert {
index: 26,
text: "you ",
},
},
OrderedOperation {
order: 31,
operation: Insert {
index: 30,
text: "doing? Albert",
},
},
],
}

View file

@ -1,60 +0,0 @@
---
source: reconcile/src/operations/operation_sequence.rs
expression: operations
snapshot_kind: text
---
EditedText {
operations: [
OrderedOperation {
order: 0,
operation: Insert {
index: 0,
text: "Hello, my friend! ",
},
},
OrderedOperation {
order: 0,
operation: Delete {
index: 18,
deleted_character_count: 13,
deleted_text: Some(
"hello world! ",
),
},
},
OrderedOperation {
order: 21,
operation: Delete {
index: 26,
deleted_character_count: 5,
deleted_text: Some(
"you? ",
),
},
},
OrderedOperation {
order: 26,
operation: Delete {
index: 26,
deleted_character_count: 5,
deleted_text: Some(
" Adam",
),
},
},
OrderedOperation {
order: 31,
operation: Insert {
index: 26,
text: "you ",
},
},
OrderedOperation {
order: 31,
operation: Insert {
index: 30,
text: "doing? Albert",
},
},
],
}

View file

@ -1,6 +0,0 @@
use token::Token;
pub mod token;
pub mod word_tokenizer;
pub type Tokenizer<T> = dyn Fn(&str) -> Vec<Token<T>>;

View file

@ -1,6 +0,0 @@
---
source: reconcile/src/tokenizer/word_tokenizer.rs
expression: "word_tokenizer(\"\")"
snapshot_kind: text
---
[]

View file

@ -1,25 +0,0 @@
---
source: reconcile/src/tokenizer/word_tokenizer.rs
expression: "word_tokenizer(\" what? \")"
snapshot_kind: text
---
[
Token {
normalised: " what?",
original: " ",
is_left_joinable: true,
is_right_joinable: true,
},
Token {
normalised: "what?",
original: "what?",
is_left_joinable: true,
is_right_joinable: true,
},
Token {
normalised: " ",
original: " ",
is_left_joinable: true,
is_right_joinable: true,
},
]

View file

@ -1,55 +0,0 @@
---
source: reconcile/src/tokenizer/word_tokenizer.rs
expression: "word_tokenizer(\" hello, \\nwhere are you?\")"
snapshot_kind: text
---
[
Token {
normalised: " hello,",
original: " ",
is_left_joinable: true,
is_right_joinable: true,
},
Token {
normalised: "hello,",
original: "hello,",
is_left_joinable: true,
is_right_joinable: true,
},
Token {
normalised: " \nwhere",
original: " \n",
is_left_joinable: true,
is_right_joinable: true,
},
Token {
normalised: "where",
original: "where",
is_left_joinable: true,
is_right_joinable: true,
},
Token {
normalised: " are",
original: " ",
is_left_joinable: true,
is_right_joinable: true,
},
Token {
normalised: "are",
original: "are",
is_left_joinable: true,
is_right_joinable: true,
},
Token {
normalised: " you?",
original: " ",
is_left_joinable: true,
is_right_joinable: true,
},
Token {
normalised: "you?",
original: "you?",
is_left_joinable: true,
is_right_joinable: true,
},
]

View file

@ -1,39 +0,0 @@
---
source: reconcile/src/tokenizer/word_tokenizer.rs
expression: "word_tokenizer(\" hello, \\nwhere are you?\")"
snapshot_kind: text
---
[
Token {
normalised: " ",
original: " ",
},
Token {
normalised: "hello,",
original: "hello,",
},
Token {
normalised: " \n",
original: " \n",
},
Token {
normalised: "where",
original: "where",
},
Token {
normalised: " ",
original: " ",
},
Token {
normalised: "are",
original: "are",
},
Token {
normalised: " ",
original: " ",
},
Token {
normalised: "you?",
original: "you?",
},
]

View file

@ -1,25 +0,0 @@
---
source: reconcile/src/tokenizer/word_tokenizer.rs
expression: "word_tokenizer(\"Hi there!\")"
snapshot_kind: text
---
[
Token {
normalised: "Hi",
original: "Hi",
is_left_joinable: true,
is_right_joinable: true,
},
Token {
normalised: " there!",
original: " ",
is_left_joinable: true,
is_right_joinable: true,
},
Token {
normalised: "there!",
original: "there!",
is_left_joinable: true,
is_right_joinable: true,
},
]

View file

@ -1,64 +0,0 @@
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
/// A token is a string that has been normalised in some way.
/// The normalised form is used for comparison, while the original form is used
/// for applying `Operation`-s.
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct Token<T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
/// The normalised form of the token used deriving the diff.
pub normalised: T,
/// The original string, that should be inserted or deleted in the document.
original: String,
/// Whether the token is joinable with the previous token.
is_left_joinable: bool,
/// Whether the token is joinable with the next token.
is_right_joinable: bool,
}
impl From<&str> for Token<String> {
fn from(text: &str) -> Self { Token::new(text.to_owned(), text.to_owned(), true, true) }
}
impl<T> Token<T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
pub fn new(
normalised: T,
original: String,
is_left_joinable: bool,
is_right_joinable: bool,
) -> Self {
Token {
normalised,
original,
is_left_joinable,
is_right_joinable,
}
}
pub fn original(&self) -> &str { &self.original }
pub fn normalised(&self) -> &T { &self.normalised }
pub fn get_original_length(&self) -> usize { self.original.chars().count() }
pub fn get_is_left_joinable(&self) -> bool { self.is_left_joinable }
pub fn get_is_right_joinable(&self) -> bool { self.is_right_joinable }
}
impl<T> PartialEq for Token<T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
fn eq(&self, other: &Self) -> bool { self.normalised == other.normalised }
}

View file

@ -1,60 +0,0 @@
use super::token::Token;
/// Splits on word boundaries creating alternating words and whitespaces with
/// the whitesspaces getting unique IDs.
///
/// ## Example
///
/// ```not_rust
/// "Hi there!" -> ["Hi", " ", "there!"]
/// ```
pub fn word_tokenizer(text: &str) -> Vec<Token<String>> {
let mut result: Vec<Token<String>> = Vec::new();
let mut previous_boundary_index = 0;
let mut previous_char_is_whitespace = text.chars().next().is_none_or(char::is_whitespace);
for (i, c) in text.char_indices() {
let is_current_char_whitespace = c.is_whitespace();
if previous_char_is_whitespace != is_current_char_whitespace {
result.push(text[previous_boundary_index..i].into());
previous_boundary_index = i;
}
previous_char_is_whitespace = is_current_char_whitespace;
}
if previous_boundary_index < text.len() {
result.push(text[previous_boundary_index..].into());
}
if result.is_empty() {
return result;
}
for i in 0..result.len() - 1 {
if result[i].original().chars().all(char::is_whitespace) {
result[i].normalised = result[i].normalised().to_owned() + result[i + 1].original();
}
}
result
}
#[cfg(test)]
mod tests {
use insta::assert_debug_snapshot;
use super::*;
#[test]
fn test_with_snapshots() {
assert_debug_snapshot!(word_tokenizer("Hi there!"));
assert_debug_snapshot!(word_tokenizer(""));
assert_debug_snapshot!(word_tokenizer(" what? "));
assert_debug_snapshot!(word_tokenizer(" hello, \nwhere are you?"));
}
}

View file

@ -1,6 +0,0 @@
pub mod common_prefix_len;
pub mod common_suffix_len;
pub mod find_longest_prefix_contained_within;
pub mod merge_iters;
pub mod side;
pub mod string_builder;

View file

@ -1,47 +0,0 @@
use core::ops::{Index, Range};
/// Given two lookups and ranges calculates the length of the common prefix.
/// Copied from <https://github.com/mitsuhiko/similar/blob/7e15c44de11a1cd61e1149189929e189ef977fd8/src/algorithms/utils.rs>
pub fn common_prefix_len<Old, New>(
old: &Old,
old_range: Range<usize>,
new: &New,
new_range: Range<usize>,
) -> usize
where
Old: Index<usize> + ?Sized,
New: Index<usize> + ?Sized,
New::Output: PartialEq<Old::Output>,
{
new_range
.zip(old_range)
.take_while(|x| new[x.0] == old[x.1])
.count()
}
#[cfg(test)]
mod tests {
use pretty_assertions::assert_eq;
use super::*;
#[test]
fn test_common_prefix_len() {
assert_eq!(
common_prefix_len("".as_bytes(), 0..0, "".as_bytes(), 0..0),
0
);
assert_eq!(
common_prefix_len("foobarbaz".as_bytes(), 0..9, "foobarblah".as_bytes(), 0..10),
7
);
assert_eq!(
common_prefix_len("foobarbaz".as_bytes(), 0..9, "blablabla".as_bytes(), 0..9),
0
);
assert_eq!(
common_prefix_len("foobarbaz".as_bytes(), 3..9, "foobarblah".as_bytes(), 3..10),
4
);
}
}

View file

@ -1,48 +0,0 @@
use core::ops::{Index, Range};
/// Given two lookups and ranges calculates the length of common suffix.
/// Copied from <https://github.com/mitsuhiko/similar/blob/7e15c44de11a1cd61e1149189929e189ef977fd8/src/algorithms/utils.rs>
pub fn common_suffix_len<Old, New>(
old: &Old,
old_range: Range<usize>,
new: &New,
new_range: Range<usize>,
) -> usize
where
Old: Index<usize> + ?Sized,
New: Index<usize> + ?Sized,
New::Output: PartialEq<Old::Output>,
{
new_range
.rev()
.zip(old_range.rev())
.take_while(|x| new[x.0] == old[x.1])
.count()
}
#[cfg(test)]
mod tests {
use pretty_assertions::assert_eq;
use super::*;
#[test]
fn test_common_suffix_len() {
assert_eq!(
common_suffix_len("".as_bytes(), 0..0, "".as_bytes(), 0..0),
0
);
assert_eq!(
common_suffix_len("1234".as_bytes(), 0..4, "X0001234".as_bytes(), 0..8),
4
);
assert_eq!(
common_suffix_len("1234".as_bytes(), 0..4, "Xxxx".as_bytes(), 0..4),
0
);
assert_eq!(
common_suffix_len("1234".as_bytes(), 2..4, "01234".as_bytes(), 2..5),
2
);
}
}

View file

@ -1,103 +0,0 @@
use crate::Token;
/// Given two lists of tokens, returns `length` where `old` list somewhere
/// within contains the `length` prefix of the `new` list.
///
/// ## Example
///
/// ```not_rust
/// old: [0, 1, 9, 0, 2, 5]
/// new: [9, 0, 2, 5, 1]
/// ```
/// > results in an length of 4
///
///
/// ```not_rust
/// old: [0, 1, 9, 0, 2, 5]
/// new: [0, 2]
/// ```
/// > results in an length of 2
///
/// ```not_rust
/// old: [0, 1, 9, 0, 2, 5]
/// new: [0, 4]
/// ```
/// > results in an length of 1
pub fn find_longest_prefix_contained_within<T>(old: &[Token<T>], new: &[Token<T>]) -> usize
where
T: PartialEq + Clone + std::fmt::Debug,
{
let max_possible = new.len().min(old.len());
for len in (1..=max_possible).rev() {
let prefix = &new[..len];
if old.windows(len).any(|window| window == prefix) {
return len;
}
}
0
}
#[cfg(test)]
mod tests {
use pretty_assertions::assert_eq;
use super::*;
#[test]
fn test_common_overlap() {
assert_eq!(
find_longest_prefix_contained_within(&["".into()], &["".into()]),
1
);
assert_eq!(
find_longest_prefix_contained_within(
&["a".into(), "b".into(), "c".into()],
&["b".into(), "c".into(), "a".into()]
),
2
);
assert_eq!(
find_longest_prefix_contained_within(
&["a".into(), "b".into(), "c".into()],
&["b".into(), "c".into()]
),
2
);
assert_eq!(
find_longest_prefix_contained_within(
&["a".into(), "b".into(), "c".into()],
&["b".into()]
),
1
);
assert_eq!(
find_longest_prefix_contained_within(
&["a".into(), "b".into(), "c".into(), "b".into(), "a".into()],
&["b".into(), "a".into()]
),
2
);
assert_eq!(
find_longest_prefix_contained_within(
&["a".into(), "a".into(), "a".into()],
&["a".into(), "b".into(), "c".into()]
),
1
);
assert_eq!(
find_longest_prefix_contained_within(
&["a".into(), "b".into(), "c".into()],
&["d".into(), "e".into(), "a".into()]
),
0
);
}
}

View file

@ -1,86 +0,0 @@
use core::{cmp::Ordering, iter::Peekable};
pub struct MergeAscending<L, R, F, O>
where
L: Iterator<Item = R::Item>,
R: Iterator,
F: Fn(&R::Item) -> O,
O: PartialOrd,
{
left: Peekable<L>,
right: Peekable<R>,
get_key: F,
}
impl<L, R, F, O> MergeAscending<L, R, F, O>
where
L: Iterator<Item = R::Item>,
R: Iterator,
F: Fn(&R::Item) -> O,
O: PartialOrd,
{
fn new(left: L, right: R, get_key: F) -> Self {
MergeAscending {
left: left.peekable(),
right: right.peekable(),
get_key,
}
}
}
impl<L, R, F, O> Iterator for MergeAscending<L, R, F, O>
where
L: Iterator<Item = R::Item>,
R: Iterator,
F: Fn(&R::Item) -> O,
O: PartialOrd,
{
type Item = L::Item;
fn next(&mut self) -> Option<L::Item> {
let order = match (self.left.peek(), self.right.peek()) {
(Some(l), Some(r)) => (self.get_key)(l).partial_cmp(&(self.get_key)(r)),
(Some(_), None) => Some(Ordering::Less),
(None, Some(_)) => Some(Ordering::Greater),
(None, None) => return None,
};
match order {
Some(Ordering::Less | Ordering::Equal) | None => self.left.next(),
Some(Ordering::Greater) => self.right.next(),
}
}
}
pub trait MergeSorted: Iterator {
fn merge_sorted_by_key<R, F, O>(self, other: R, get_key: F) -> MergeAscending<Self, R, F, O>
where
Self: Sized,
R: Iterator<Item = Self::Item>,
F: Fn(&Self::Item) -> O,
O: PartialOrd,
{
MergeAscending::new(self, other, get_key)
}
}
impl<T: ?Sized> MergeSorted for T where T: Iterator {}
#[cfg(test)]
mod tests {
use pretty_assertions::assert_eq;
use super::*;
#[test]
fn test_merge_sorted_by_key() {
let left = [9, 7, 5, 3, 1];
let right = [7, 6, 5, 4, 3];
let result: Vec<i32> = left
.into_iter()
.merge_sorted_by_key(right.into_iter(), |x| -1 * x)
.collect();
assert_eq!(result, vec![9, 7, 7, 6, 5, 5, 4, 3, 3, 1]);
}
}

View file

@ -1,16 +0,0 @@
use std::fmt::Display;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Side {
Left,
Right,
}
impl Display for Side {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Side::Left => write!(f, "Left"),
Side::Right => write!(f, "Right"),
}
}
}

View file

@ -1,111 +0,0 @@
use core::ops::Range;
/// A helper for building a string in order based on an original string and a
/// series of insertions and deletions applied to it. It is safe to use with
/// UTF-8 strings as all operations are based on character indices.
#[derive(Debug, Clone)]
pub struct StringBuilder<'a> {
original: &'a str,
last_old_char_index: usize,
buffer: String,
}
impl StringBuilder<'_> {
pub fn new(original: &str) -> StringBuilder<'_> {
StringBuilder {
original,
last_old_char_index: 0,
buffer: String::with_capacity(original.len()),
}
}
/// Insert a string at the given index after copying the original string up
/// to that index from the last insertion or deletion.
pub fn insert(&mut self, from: usize, text: &str) {
self.copy_until(from);
self.buffer.push_str(text);
}
/// Delete a string at the given index after copying the original string up
/// to that index from the last insertion or deletion.
pub fn delete(&mut self, range: core::ops::Range<usize>) {
self.copy_until(range.start);
self.last_old_char_index += range.len();
}
fn copy_until(&mut self, index: usize) {
let current_char_count = self.buffer.chars().count();
debug_assert!(
index >= current_char_count,
"String builder only support building in order"
);
let jump = index - current_char_count;
self.buffer.push_str(
&self
.original
.chars()
.skip(self.last_old_char_index)
.take(jump)
.collect::<String>(),
);
self.last_old_char_index += jump;
}
/// Finish building the string after copying the remaining original string
/// since the last insertion or deletion.
pub fn build(mut self) -> String {
self.buffer.push_str(
&self
.original
.chars()
.skip(self.last_old_char_index)
.collect::<String>(),
);
self.buffer
}
#[allow(dead_code)]
pub fn get_slice(&self, range: Range<usize>) -> String {
let result = self
.buffer
.chars()
.chain(self.original.chars().skip(self.last_old_char_index))
.skip(range.start)
.take(range.end - range.start)
.collect::<String>();
debug_assert_eq!(result.chars().count(), range.len(), "Range out of bounds",);
result
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_string_builder() {
let original = "aaa bbb ccc";
let mut builder = StringBuilder::new(original);
builder.insert(0, "ddd ");
builder.delete(4..8);
builder.insert(11, " eee");
assert_eq!(builder.build(), "ddd bbb ccc eee");
}
#[test]
fn test_string_builder2() {
let original = "abcde";
let mut builder = StringBuilder::new(original);
builder.delete(1..4);
assert_eq!(builder.build(), "ae");
}
}

View file

@ -1,103 +0,0 @@
use pretty_assertions::assert_eq;
use reconcile::{CursorPosition, TextWithCursors};
use serde::Deserialize;
/// `ExampleDocument` represents a test case for the reconciliation process.
/// It contains a parent string, left and right strings with cursor positions,
/// and the expected result after reconciliation.
///
/// '|' characters in the left, right, and expected strings are treated as
/// cursor positions and are converted into `CursorPosition` objects.
#[derive(Debug, Deserialize, Clone, PartialEq, Eq)]
pub struct ExampleDocument {
parent: String,
left: String,
right: String,
expected: String,
}
impl ExampleDocument {
#[must_use]
pub fn parent(&self) -> String { self.parent.clone() }
#[must_use]
pub fn left(&self) -> TextWithCursors<'static> {
ExampleDocument::string_to_text_with_cursors(&self.left)
}
#[must_use]
pub fn right(&self) -> TextWithCursors<'static> {
ExampleDocument::string_to_text_with_cursors(&self.right)
}
/// Asserts that the result string matches the expected string,
/// including cursor positions.
///
/// # Panics
///
/// If the result string does not match the expected string, the program
/// will panic.
pub fn assert_eq(&self, result: &TextWithCursors<'static>) {
let result_str = ExampleDocument::text_with_cursors_to_string(result);
assert_eq!(
self.expected, result_str,
"Left (expected) isn't equal to right (actual). Actual: ```\n{result_str}```",
);
}
/// Asserts that the result string matches the expected string,
/// ignoring cursor positions.
///
/// # Panics
///
/// If the result string does not match the expected string, the program
/// will panic.
pub fn assert_eq_without_cursors(&self, result: &str) {
let expected = ExampleDocument::string_to_text_with_cursors(&self.expected).text;
assert_eq!(
expected, result,
"Left (expected) isn't equal to right (actual), Actual: ```\n{result}```",
);
}
fn text_with_cursors_to_string(text: &TextWithCursors<'_>) -> String {
let mut result = text.text.clone().into_owned();
for (i, cursor) in text.cursors.iter().enumerate() {
assert!(
cursor.char_index <= result.len(), // equals in case of insert at the end
"Cursor index out of bounds: {} > {} when testing for '{result}'",
cursor.char_index,
result.len()
);
result.insert(
result
.char_indices()
.nth(cursor.char_index + i)
.map_or_else(|| result.len(), |(byte_index, _)| byte_index), /* find the utf8 char index of the insert
* in byte index */
'|',
);
}
result
}
fn string_to_text_with_cursors(text: &str) -> TextWithCursors<'static> {
let cursors = Self::parse_cursors(text);
let text = text.replace('|', "");
TextWithCursors::new_owned(text, cursors)
}
fn parse_cursors(text: &str) -> Vec<CursorPosition> {
let mut cursors = Vec::new();
for (i, c) in text.chars().enumerate() {
if c == '|' {
cursors.push(CursorPosition {
id: 0,
char_index: i - cursors.len(),
});
}
}
cursors
}
}

View file

@ -1 +0,0 @@
The `|` characters denote cursor positions which are stripped before the actual reconcile logic is run

View file

@ -1,31 +0,0 @@
# Both delete the same range
parent: original_1 original_2 original_3 original_4 original_5
left: original_1 original_5|
right: "|original_1 original_5"
expected: "|original_1 original_5|"
---
# Both delete a range and one range contains the other
parent: original_1 original_2 original_3 original_4 original_5
left: original_1 original_5
right: original_1 original_4 original_5
expected: original_1 original_5
---
# Deleting overlapping ranges
parent: original_1 original_2 original_3 original_4 original_5
left: original_1 original_4| original_5
right: original_1 original_2| original_5
expected: original_1|| original_5
---
parent: long text with one big delete and many small
left: long small
right: long with big and small
expected: long small
---
parent: long text where the cursor has to be clamped after delete
left: long text where the cursor has to be clamped after delete|
right: long text where the cursor
expected: long text where the cursor|

View file

@ -1,12 +0,0 @@
# One deleted a large range, the other deleted subranges and inserted as well
parent: original_1 original_2 original_3 original_4 original_5
left: original_1 original_5
right: original_1 edit_1 original_3 edit_2 original_5
expected: original_1 edit_1 edit_2 original_5
---
# One deleted a large range, the other inserted and deleted a partially overlapping range
parent: original_1 original_2 original_3 original_4 original_5
left: original_1 original_5
right: original_1 edit_1 original_3 edit_2
expected: original_1 edit_1 edit_2

View file

@ -1,24 +0,0 @@
# Both inserted the same prefix; this should get deduplicateed
parent: "hi "
left: "hi there "
right: "hi there my friend "
expected: "hi there my friend "
---
# The prefix of the 2nd appears on the 1st so it shouldn't get duplicatelicated
parent: "hi "
left: "hi there you "
right: "hi there my friend "
expected: "hi there my friend you "
---
parent: a
left: a b c
right: a b c d
expected: a b c d
---
parent: a
left: abc
right: abcd
expected: abcabcd

View file

@ -1,63 +0,0 @@
parent: Hello!
left: |
Hello there!
How are you?
right: |
Hello there!
Best,
Andras
expected: |
Hello there!
Best,
Andras
How are you?
---
parent: |
- my list
- 2nd item
- 3rd item
left: |
- my list
- 2nd item
- nested list
- very nested list
- 3rd item
right: |
- my list
- nested list
- 2nd item
- 3rd item
- another nested list
expected: |
- my list
- nested list
- 2nd item
- nested list
- very nested list
- 3rd item
- another nested list
---
parent: |
a
a
left: |
a|
a
right: |
a|
a
expected: |
a||
a

View file

@ -1,19 +0,0 @@
# Both replaced one token but the tokens are different
parent: original_1 original_2 original_3
left: original_1 edit_1| original_3
right: original_1 original_2| edit_2
expected: original_1 edit_1|| edit_2
---
# Both replace the same token with the same value
parent: original_1 original_2 original_3
left: original_1 edit_1| original_3
right: original_1 edit_1 original_3|
expected: original_1 edit_1| original_3|
---
# Both replace the same token with different value
parent: original_1 original_2 original_3
left: original_1 edit_1| original_3
right: original_1 conflicting_edit_1| original_3
expected: original_1 conflicting_edit_1| edit_1| original_3

View file

@ -1,10 +0,0 @@
parent: Meeting at 2pm in 会议室
left: Meeting at |3pm in 会议室
right: Team meeting at 2pm in conference room|
expected: Team meeting at |3pm in conference room|
---
parent: " "
left: "it|s utf-8!"
right: " "
expected: "it|s utf-8!"

View file

@ -1,130 +0,0 @@
parent: You're Annual Savings Statement is available in our online portal
left: Your| annual record is available in our online portal|
right: You're Annual Savings information| is available online
expected: Your| annual record information| is available online|
---
parent: Party A shall pay Party B
left: Party C shall pay Party B
right: Party A shall receive from Party B
expected: Party C shall receive from Party B
---
parent:
left: hi my friend|
right: hi there|
expected: hi my friend| there|
---
parent: ""
left: ""
right: ""
expected: ""
---
parent: ""
left: "|"
right: "|"
expected: "||"
---
parent: Buy milk and eggs
left: Buy organic milk| and eggs|
right: Buy milk and eggs| and bread
expected: Buy organic milk| and eggs|| and bread
---
parent: Send the report to the team
left: Send the |detailed report to the |entire |team
right: Send the |quarterly |detailed report to the team
expected: Send the |detailed |quarterly |detailed report to the |entire |team
---
parent: Ready, Set go
left: Ready! Set go|
right: Ready, Set, go!|
expected: Ready! Set, go!||
---
parent: "Total: $100"
left: "Total: |$150"
right: "Total: |€100"
expected: "Total: |$150 |€100"
---
parent: Start middle end
left: Start [important] middle end|
right: Start middle [critical] end|
expected: Start [important] middle [critical] end||
---
parent: marketplace
left: market| place
right: market|space
expected: market| placemarket|space
---
parent: A B C D
left: A X B D|
right: A B Y|
expected: A X B |Y|
---
parent: Please submit your assignment by Friday
left: Please submit your |completed |assignment by Friday
right: Please submit your assignment |online |by Friday
expected: Please submit your |completed |assignment |online |by Friday
---
parent: "a b "
left: "c d "
right: "a b c d "
expected: "c d c d "
---
parent: a b c d e
left: a e|
right: a c e|
expected: a e||
---
parent: a 0 1 2 b
left: a 0 1| 2 b
right: a b|
expected: a| b|
---
parent: a 0 1 2 b
left: "|a b"
right: "|a E 1 F b"
expected: "||a E F b"
---
parent: a this one delete b
left: a b|
right: a my one change b|
expected: a my change b||
---
parent: this stays, this is one big delete, don't touch this
left: this stays, don't touch this|
right: this stays, my one change, don't touch this|
expected: this stays, my change, don't touch this||
---
parent: 1 2 3 4 5 6
left: 1| 6
right: 1 2 4|
expected: 1||
---
parent: hello world
left: hi, world
right: hello my friend!
expected: hi, my friend!
---
parent: a a
left: a
right: a
expected: a

View file

@ -1,742 +0,0 @@
# Reserved Strings
#
# Strings which may be used elsewhere in code
undefined
undef
null
NULL
(null)
nil
NIL
true
false
True
False
TRUE
FALSE
None
hasOwnProperty
then
constructor
\
\\
# Numeric Strings
#
# Strings which can be interpreted as numeric
0
1
1.00
$1.00
1/2
1E2
1E02
1E+02
-1
-1.00
-$1.00
-1/2
-1E2
-1E02
-1E+02
1/0
0/0
-2147483648/-1
-9223372036854775808/-1
-0
-0.0
+0
+0.0
0.00
0..0
.
0.0.0
0,00
0,,0
,
0,0,0
0.0/0
1.0/0.0
0.0/0.0
1,0/0,0
0,0/0,0
--1
-
-.
-,
999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999
NaN
Infinity
-Infinity
INF
1#INF
-1#IND
1#QNAN
1#SNAN
1#IND
0x0
0xffffffff
0xffffffffffffffff
0xabad1dea
123456789012345678901234567890123456789
1,000.00
1 000.00
1'000.00
1,000,000.00
1 000 000.00
1'000'000.00
1.000,00
1 000,00
1'000,00
1.000.000,00
1 000 000,00
1'000'000,00
01000
08
09
2.2250738585072011e-308
# Special Characters
#
# ASCII punctuation. All of these characters may need to be escaped in some
# contexts. Divided into three groups based on (US-layout) keyboard position.
,./;'[]\-=
<>?:"{}|_+
!@#$%^&*()`~
# Non-whitespace C0 controls: U+0001 through U+0008, U+000E through U+001F,
# and U+007F (DEL)
# Often forbidden to appear in various text-based file formats (e.g. XML),
# or reused for internal delimiters on the theory that they should never
# appear in input.
# The next line may appear to be blank or mojibake in some viewers.

# Non-whitespace C1 controls: U+0080 through U+0084 and U+0086 through U+009F.
# Commonly misinterpreted as additional graphic characters.
# The next line may appear to be blank, mojibake, or dingbats in some viewers.
€‚ƒ„†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ
# Whitespace: all of the characters with category Zs, Zl, or Zp (in Unicode
# version 8.0.0), plus U+0009 (HT), U+000B (VT), U+000C (FF), U+0085 (NEL),
# and U+200B (ZERO WIDTH SPACE), which are in the C categories but are often
# treated as whitespace in some contexts.
# This file unfortunately cannot express strings containing
# U+0000, U+000A, or U+000D (NUL, LF, CR).
# The next line may appear to be blank or mojibake in some viewers.
# The next line may be flagged for "trailing whitespace" in some viewers.
…  
# Unicode additional control characters: all of the characters with
# general category Cf (in Unicode 8.0.0).
# The next line may appear to be blank or mojibake in some viewers.
­؀؁؂؃؄؅؜۝܏᠎​‌‍‎‏‪‫‬‭‮⁠⁡⁢⁣⁤⁦⁧⁨⁩𑂽𛲠𛲡𛲢𛲣𝅳𝅴𝅵𝅶𝅷𝅸𝅹𝅺󠀁󠀠󠀡󠀢󠀣󠀤󠀥󠀦󠀧󠀨󠀩󠀪󠀫󠀬󠀭󠀮󠀯󠀰󠀱󠀲󠀳󠀴󠀵󠀶󠀷󠀸󠀹󠀺󠀻󠀼󠀽󠀾󠀿󠁀󠁁󠁂󠁃󠁄󠁅󠁆󠁇󠁈󠁉󠁊󠁋󠁌󠁍󠁎󠁏󠁐󠁑󠁒󠁓󠁔󠁕󠁖󠁗󠁘󠁙󠁚󠁛󠁜󠁝󠁞󠁟󠁠󠁡󠁢󠁣󠁤󠁥󠁦󠁧󠁨󠁩󠁪󠁫󠁬󠁭󠁮󠁯󠁰󠁱󠁲󠁳󠁴󠁵󠁶󠁷󠁸󠁹󠁺󠁻󠁼󠁽󠁾󠁿
# "Byte order marks", U+FEFF and U+FFFE, each on its own line.
# The next two lines may appear to be blank or mojibake in some viewers.

# Unicode Symbols
#
# Strings which contain common unicode symbols (e.g. smart quotes)
Ω≈ç√∫˜µ≤≥÷
åß∂ƒ©˙∆˚¬…æ
œ∑´®†¥¨ˆøπ“‘
¡™£¢∞§¶•ªº–≠
¸˛Ç◊ı˜Â¯˘¿
ÅÍÎÏ˝ÓÔÒÚÆ☃
Œ„´‰ˇÁ¨ˆØ∏”’
`⁄€‹›fifl‡°·‚—±
⅛⅜⅝⅞
ЁЂЃЄЅІЇЈЉЊЋЌЍЎЏАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюя
٠١٢٣٤٥٦٧٨٩
# Unicode Subscript/Superscript/Accents
#
# Strings which contain unicode subscripts/superscripts; can cause rendering issues
⁰⁴⁵
₀₁₂
⁰⁴⁵₀₁₂
ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็ ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็ ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็
# Quotation Marks
#
# Strings which contain misplaced quotation marks; can cause encoding errors
'
"
''
""
'"'
"''''"'"
"'"'"''''"
<foo val=“bar” />
<foo val=“bar” />
<foo val=”bar“ />
<foo val=`bar' />
# Two-Byte Characters
#
# Strings which contain two-byte characters: can cause rendering issues or character-length issues
田中さんにあげて下さい
パーティーへ行かないか
和製漢語
部落格
사회과학원 어학연구소
찦차를 타고 온 펲시맨과 쑛다리 똠방각하
社會科學院語學研究所
울란바토르
𠜎𠜱𠝹𠱓𠱸𠲖𠳏
# Strings which contain two-byte letters: can cause issues with naïve UTF-16 capitalizers which think that 16 bits == 1 character
𐐜 𐐔𐐇𐐝𐐀𐐡𐐇𐐓 𐐙𐐊𐐡𐐝𐐓/𐐝𐐇𐐗𐐊𐐤𐐔 𐐒𐐋𐐗 𐐒𐐌 𐐜 𐐡𐐀𐐖𐐇𐐤𐐓𐐝 𐐱𐑂 𐑄 𐐔𐐇𐐝𐐀𐐡𐐇𐐓 𐐏𐐆𐐅𐐤𐐆𐐚𐐊𐐡𐐝𐐆𐐓𐐆
# Special Unicode Characters Union
#
# A super string recommended by VMware Inc. Globalization Team: can effectively cause rendering issues or character-length issues to validate product globalization readiness.
#
# 表 CJK_UNIFIED_IDEOGRAPHS (U+8868)
# ポ KATAKANA LETTER PO (U+30DD)
# あ HIRAGANA LETTER A (U+3042)
# A LATIN CAPITAL LETTER A (U+0041)
# 鷗 CJK_UNIFIED_IDEOGRAPHS (U+9DD7)
# Œ LATIN SMALL LIGATURE OE (U+0153)
# é LATIN SMALL LETTER E WITH ACUTE (U+00E9)
# FULLWIDTH LATIN CAPITAL LETTER B (U+FF22)
# 逍 CJK_UNIFIED_IDEOGRAPHS (U+900D)
# Ü LATIN SMALL LETTER U WITH DIAERESIS (U+00FC)
# ß LATIN SMALL LETTER SHARP S (U+00DF)
# ª FEMININE ORDINAL INDICATOR (U+00AA)
# ą LATIN SMALL LETTER A WITH OGONEK (U+0105)
# ñ LATIN SMALL LETTER N WITH TILDE (U+00F1)
# 丂 CJK_UNIFIED_IDEOGRAPHS (U+4E02)
# 㐀 CJK Ideograph Extension A, First (U+3400)
# 𠀀 CJK Ideograph Extension B, First (U+20000)
表ポあA鷗Œé逍Üߪąñ丂㐀𠀀
# Changing length when lowercased
#
# Characters which increase in length (2 to 3 bytes) when lowercased
# Credit: https://twitter.com/jifa/status/625776454479970304
Ⱥ
Ⱦ
# Japanese Emoticons
#
# Strings which consists of Japanese-style emoticons which are popular on the web
ヽ༼ຈل͜ຈ༽ノ ヽ༼ຈل͜ຈ༽ノ
(。◕ ∀ ◕。)
`ィ(´∀`∩
__ロ(,_,*)
・( ̄∀ ̄)・:*:
゚・✿ヾ╲(。◕‿◕。)╱✿・゚
,。・:*:・゜’( ☻ ω ☻ )。・:*:・゜’
(╯°□°)╯︵ ┻━┻)
(ノಥ益ಥ)ノ ┻━┻
┬─┬ノ( º _ ºノ)
( ͡° ͜ʖ ͡°)
¯\_(ツ)_/¯
# Emoji
#
# Strings which contain Emoji; should be the same behavior as two-byte characters, but not always
😍
👩🏽
👨‍🦰 👨🏿‍🦰 👨‍🦱 👨🏿‍🦱 🦹🏿‍♂️
👾 🙇 💁 🙅 🙆 🙋 🙎 🙍
🐵 🙈 🙉 🙊
❤️ 💔 💌 💕 💞 💓 💗 💖 💘 💝 💟 💜 💛 💚 💙
✋🏿 💪🏿 👐🏿 🙌🏿 👏🏿 🙏🏿
👨‍👩‍👦 👨‍👩‍👧‍👦 👨‍👨‍👦 👩‍👩‍👧 👨‍👦 👨‍👧‍👦 👩‍👦 👩‍👧‍👦
🚾 🆒 🆓 🆕 🆖 🆗 🆙 🏧
0⃣ 1⃣ 2⃣ 3⃣ 4⃣ 5⃣ 6⃣ 7⃣ 8⃣ 9⃣ 🔟
# Regional Indicator Symbols
#
# Regional Indicator Symbols can be displayed differently across
# fonts, and have a number of special behaviors
🇺🇸🇷🇺🇸 🇦🇫🇦🇲🇸
🇺🇸🇷🇺🇸🇦🇫🇦🇲
🇺🇸🇷🇺🇸🇦
# Unicode Numbers
#
# Strings which contain unicode numbers; if the code is localized, it should see the input as numeric
١٢٣
# Right-To-Left Strings
#
# Strings which contain text that should be rendered RTL if possible (e.g. Arabic, Hebrew)
ثم نفس سقطت وبالتحديد،, جزيرتي باستخدام أن دنو. إذ هنا؟ الستار وتنصيب كان. أهّل ايطاليا، بريطانيا-فرنسا قد أخذ. سليمان، إتفاقية بين ما, يذكر الحدود أي بعد, معاملة بولندا، الإطلاق عل إيو.
בְּרֵאשִׁית, בָּרָא אֱלֹהִים, אֵת הַשָּׁמַיִם, וְאֵת הָאָרֶץ
הָיְתָהtestالصفحات التّحول
مُنَاقَشَةُ سُبُلِ اِسْتِخْدَامِ اللُّغَةِ فِي النُّظُمِ الْقَائِمَةِ وَفِيم يَخُصَّ التَّطْبِيقَاتُ الْحاسُوبِيَّةُ،
الكل في المجمو عة (5)
# Ogham Text
#
# The only unicode alphabet to use a space which isn't empty but should still act like a space.
᚛ᚄᚓᚐᚋᚒᚄ ᚑᚄᚂᚑᚏᚅ᚜
᚛                 ᚜
# Trick Unicode
#
# Strings which contain unicode with unusual properties (e.g. Right-to-left override) (c.f. http://www.unicode.org/charts/PDF/U2000.pdf)
test
test
test
testtest
test
# Zalgo Text
#
# Strings which contain "corrupted" text. The corruption will not appear in non-HTML text, however. (via http://www.eeemo.net)
Ṱ̺̺̕o͞ ̷i̲̬͇̪͙n̝̗͕v̟̜̘̦͟o̶̙̰̠kè͚̮̺̪̹̱̤ ̖t̝͕̳̣̻̪͞h̼͓̲̦̳̘̲e͇̣̰̦̬͎ ̢̼̻̱̘h͚͎͙̜̣̲ͅi̦̲̣̰̤v̻͍e̺̭̳̪̰-m̢iͅn̖̺̞̲̯̰d̵̼̟͙̩̼̘̳ ̞̥̱̳̭r̛̗̘e͙p͠r̼̞̻̭̗e̺̠̣͟s̘͇̳͍̝͉e͉̥̯̞̲͚̬͜ǹ̬͎͎̟̖͇̤t͍̬̤͓̼̭͘ͅi̪̱n͠g̴͉ ͏͉ͅc̬̟h͡a̫̻̯͘o̫̟̖͍̙̝͉s̗̦̲.̨̹͈̣
̡͓̞ͅI̗̘̦͝n͇͇͙v̮̫ok̲̫̙͈i̖͙̭̹̠̞n̡̻̮̣̺g̲͈͙̭͙̬͎ ̰t͔̦h̞̲e̢̤ ͍̬̲͖f̴̘͕̣è͖ẹ̥̩l͖͔͚i͓͚̦͠n͖͍̗͓̳̮g͍ ̨o͚̪͡f̘̣̬ ̖̘͖̟͙̮c҉͔̫͖͓͇͖ͅh̵̤̣͚͔á̗̼͕ͅo̼̣̥s̱͈̺̖̦̻͢.̛̖̞̠̫̰
̗̺͖̹̯͓Ṯ̤͍̥͇͈h̲́e͏͓̼̗̙̼̣͔ ͇̜̱̠͓͍ͅN͕͠e̗̱z̘̝̜̺͙p̤̺̹͍̯͚e̠̻̠͜r̨̤͍̺̖͔̖̖d̠̟̭̬̝͟i̦͖̩͓͔̤a̠̗̬͉̙n͚͜ ̻̞̰͚ͅh̵͉i̳̞v̢͇ḙ͎͟-҉̭̩̼͔m̤̭̫i͕͇̝̦n̗͙ḍ̟ ̯̲͕͞ǫ̟̯̰̲͙̻̝f ̪̰̰̗̖̭̘͘c̦͍̲̞͍̩̙ḥ͚a̮͎̟̙͜ơ̩̹͎s̤.̝̝ ҉Z̡̖̜͖̰̣͉̜a͖̰͙̬͡l̲̫̳͍̩g̡̟̼̱͚̞̬ͅo̗͜.̟
̦H̬̤̗̤͝e͜ ̜̥̝̻͍̟́w̕h̖̯͓o̝͙̖͎̱̮ ҉̺̙̞̟͈W̷̼̭a̺̪͍į͈͕̭͙̯̜t̶̼̮s̘͙͖̕ ̠̫̠B̻͍͙͉̳ͅe̵h̵̬͇̫͙i̹͓̳̳̮͎̫̕n͟d̴̪̜̖ ̰͉̩͇͙̲͞ͅT͖̼͓̪͢h͏͓̮̻e̬̝̟ͅ ̤̹̝W͙̞̝͔͇͝ͅa͏͓͔̹̼̣l̴͔̰̤̟͔ḽ̫.͕
Z̮̞̠͙͔ͅḀ̗̞͈̻̗Ḷ͙͎̯̹̞͓G̻O̭̗̮
# Unicode Upsidedown
#
# Strings which contain unicode with an "upsidedown" effect (via http://www.upsidedowntext.com)
˙ɐnbᴉlɐ ɐuƃɐɯ ǝɹolop ʇǝ ǝɹoqɐl ʇn ʇunpᴉpᴉɔuᴉ ɹodɯǝʇ poɯsnᴉǝ op pǝs 'ʇᴉlǝ ƃuᴉɔsᴉdᴉpɐ ɹnʇǝʇɔǝsuoɔ 'ʇǝɯɐ ʇᴉs ɹolop ɯnsdᴉ ɯǝɹo˥
00˙Ɩ$-
# Unicode font
#
# Strings which contain bold/italic/etc. versions of normal characters
𝐓𝐡𝐞 𝐪𝐮𝐢𝐜𝐤 𝐛𝐫𝐨𝐰𝐧 𝐟𝐨𝐱 𝐣𝐮𝐦𝐩𝐬 𝐨𝐯𝐞𝐫 𝐭𝐡𝐞 𝐥𝐚𝐳𝐲 𝐝𝐨𝐠
𝕿𝖍𝖊 𝖖𝖚𝖎𝖈𝖐 𝖇𝖗𝖔𝖜𝖓 𝖋𝖔𝖝 𝖏𝖚𝖒𝖕𝖘 𝖔𝖛𝖊𝖗 𝖙𝖍𝖊 𝖑𝖆𝖟𝖞 𝖉𝖔𝖌
𝑻𝒉𝒆 𝒒𝒖𝒊𝒄𝒌 𝒃𝒓𝒐𝒘𝒏 𝒇𝒐𝒙 𝒋𝒖𝒎𝒑𝒔 𝒐𝒗𝒆𝒓 𝒕𝒉𝒆 𝒍𝒂𝒛𝒚 𝒅𝒐𝒈
𝓣𝓱𝓮 𝓺𝓾𝓲𝓬𝓴 𝓫𝓻𝓸𝔀𝓷 𝓯𝓸𝔁 𝓳𝓾𝓶𝓹𝓼 𝓸𝓿𝓮𝓻 𝓽𝓱𝓮 𝓵𝓪𝔃𝔂 𝓭𝓸𝓰
𝕋𝕙𝕖 𝕢𝕦𝕚𝕔𝕜 𝕓𝕣𝕠𝕨𝕟 𝕗𝕠𝕩 𝕛𝕦𝕞𝕡𝕤 𝕠𝕧𝕖𝕣 𝕥𝕙𝕖 𝕝𝕒𝕫𝕪 𝕕𝕠𝕘
𝚃𝚑𝚎 𝚚𝚞𝚒𝚌𝚔 𝚋𝚛𝚘𝚠𝚗 𝚏𝚘𝚡 𝚓𝚞𝚖𝚙𝚜 𝚘𝚟𝚎𝚛 𝚝𝚑𝚎 𝚕𝚊𝚣𝚢 𝚍𝚘𝚐
⒯⒣⒠ ⒬⒰⒤⒞⒦ ⒝⒭⒪⒲⒩ ⒡⒪⒳ ⒥⒰⒨⒫⒮ ⒪⒱⒠⒭ ⒯⒣⒠ ⒧⒜⒵⒴ ⒟⒪⒢
# Script Injection
#
# Strings which attempt to invoke a benign script injection; shows vulnerability to XSS
<script>alert(0)</script>
&lt;script&gt;alert(&#39;1&#39;);&lt;/script&gt;
<img src=x onerror=alert(2) />
<svg><script>123<1>alert(3)</script>
"><script>alert(4)</script>
'><script>alert(5)</script>
><script>alert(6)</script>
</script><script>alert(7)</script>
< / script >< script >alert(8)< / script >
 onfocus=JaVaSCript:alert(9) autofocus
" onfocus=JaVaSCript:alert(10) autofocus
' onfocus=JaVaSCript:alert(11) autofocus
scriptalert(12)/script
<sc<script>ript>alert(13)</sc</script>ript>
--><script>alert(14)</script>
";alert(15);t="
';alert(16);t='
JavaSCript:alert(17)
;alert(18);
src=JaVaSCript:prompt(19)
"><script>alert(20);</script x="
'><script>alert(21);</script x='
><script>alert(22);</script x=
" autofocus onkeyup="javascript:alert(23)
' autofocus onkeyup='javascript:alert(24)
<script\x20type="text/javascript">javascript:alert(25);</script>
<script\x3Etype="text/javascript">javascript:alert(26);</script>
<script\x0Dtype="text/javascript">javascript:alert(27);</script>
<script\x09type="text/javascript">javascript:alert(28);</script>
<script\x0Ctype="text/javascript">javascript:alert(29);</script>
<script\x2Ftype="text/javascript">javascript:alert(30);</script>
<script\x0Atype="text/javascript">javascript:alert(31);</script>
'`"><\x3Cscript>javascript:alert(32)</script>
'`"><\x00script>javascript:alert(33)</script>
ABC<div style="x\x3Aexpression(javascript:alert(34)">DEF
ABC<div style="x:expression\x5C(javascript:alert(35)">DEF
ABC<div style="x:expression\x00(javascript:alert(36)">DEF
ABC<div style="x:exp\x00ression(javascript:alert(37)">DEF
ABC<div style="x:exp\x5Cression(javascript:alert(38)">DEF
ABC<div style="x:\x0Aexpression(javascript:alert(39)">DEF
ABC<div style="x:\x09expression(javascript:alert(40)">DEF
ABC<div style="x:\xE3\x80\x80expression(javascript:alert(41)">DEF
ABC<div style="x:\xE2\x80\x84expression(javascript:alert(42)">DEF
ABC<div style="x:\xC2\xA0expression(javascript:alert(43)">DEF
ABC<div style="x:\xE2\x80\x80expression(javascript:alert(44)">DEF
ABC<div style="x:\xE2\x80\x8Aexpression(javascript:alert(45)">DEF
ABC<div style="x:\x0Dexpression(javascript:alert(46)">DEF
ABC<div style="x:\x0Cexpression(javascript:alert(47)">DEF
ABC<div style="x:\xE2\x80\x87expression(javascript:alert(48)">DEF
ABC<div style="x:\xEF\xBB\xBFexpression(javascript:alert(49)">DEF
ABC<div style="x:\x20expression(javascript:alert(50)">DEF
ABC<div style="x:\xE2\x80\x88expression(javascript:alert(51)">DEF
ABC<div style="x:\x00expression(javascript:alert(52)">DEF
ABC<div style="x:\xE2\x80\x8Bexpression(javascript:alert(53)">DEF
ABC<div style="x:\xE2\x80\x86expression(javascript:alert(54)">DEF
ABC<div style="x:\xE2\x80\x85expression(javascript:alert(55)">DEF
ABC<div style="x:\xE2\x80\x82expression(javascript:alert(56)">DEF
ABC<div style="x:\x0Bexpression(javascript:alert(57)">DEF
ABC<div style="x:\xE2\x80\x81expression(javascript:alert(58)">DEF
ABC<div style="x:\xE2\x80\x83expression(javascript:alert(59)">DEF
ABC<div style="x:\xE2\x80\x89expression(javascript:alert(60)">DEF
<a href="\x0Bjavascript:javascript:alert(61)" id="fuzzelement1">test</a>
<a href="\x0Fjavascript:javascript:alert(62)" id="fuzzelement1">test</a>
<a href="\xC2\xA0javascript:javascript:alert(63)" id="fuzzelement1">test</a>
<a href="\x05javascript:javascript:alert(64)" id="fuzzelement1">test</a>
<a href="\xE1\xA0\x8Ejavascript:javascript:alert(65)" id="fuzzelement1">test</a>
<a href="\x18javascript:javascript:alert(66)" id="fuzzelement1">test</a>
<a href="\x11javascript:javascript:alert(67)" id="fuzzelement1">test</a>
<a href="\xE2\x80\x88javascript:javascript:alert(68)" id="fuzzelement1">test</a>
<a href="\xE2\x80\x89javascript:javascript:alert(69)" id="fuzzelement1">test</a>
<a href="\xE2\x80\x80javascript:javascript:alert(70)" id="fuzzelement1">test</a>
<a href="\x17javascript:javascript:alert(71)" id="fuzzelement1">test</a>
<a href="\x03javascript:javascript:alert(72)" id="fuzzelement1">test</a>
<a href="\x0Ejavascript:javascript:alert(73)" id="fuzzelement1">test</a>
<a href="\x1Ajavascript:javascript:alert(74)" id="fuzzelement1">test</a>
<a href="\x00javascript:javascript:alert(75)" id="fuzzelement1">test</a>
<a href="\x10javascript:javascript:alert(76)" id="fuzzelement1">test</a>
<a href="\xE2\x80\x82javascript:javascript:alert(77)" id="fuzzelement1">test</a>
<a href="\x20javascript:javascript:alert(78)" id="fuzzelement1">test</a>
<a href="\x13javascript:javascript:alert(79)" id="fuzzelement1">test</a>
<a href="\x09javascript:javascript:alert(80)" id="fuzzelement1">test</a>
<a href="\xE2\x80\x8Ajavascript:javascript:alert(81)" id="fuzzelement1">test</a>
<a href="\x14javascript:javascript:alert(82)" id="fuzzelement1">test</a>
<a href="\x19javascript:javascript:alert(83)" id="fuzzelement1">test</a>
<a href="\xE2\x80\xAFjavascript:javascript:alert(84)" id="fuzzelement1">test</a>
<a href="\x1Fjavascript:javascript:alert(85)" id="fuzzelement1">test</a>
<a href="\xE2\x80\x81javascript:javascript:alert(86)" id="fuzzelement1">test</a>
<a href="\x1Djavascript:javascript:alert(87)" id="fuzzelement1">test</a>
<a href="\xE2\x80\x87javascript:javascript:alert(88)" id="fuzzelement1">test</a>
<a href="\x07javascript:javascript:alert(89)" id="fuzzelement1">test</a>
<a href="\xE1\x9A\x80javascript:javascript:alert(90)" id="fuzzelement1">test</a>
<a href="\xE2\x80\x83javascript:javascript:alert(91)" id="fuzzelement1">test</a>
<a href="\x04javascript:javascript:alert(92)" id="fuzzelement1">test</a>
<a href="\x01javascript:javascript:alert(93)" id="fuzzelement1">test</a>
<a href="\x08javascript:javascript:alert(94)" id="fuzzelement1">test</a>
<a href="\xE2\x80\x84javascript:javascript:alert(95)" id="fuzzelement1">test</a>
<a href="\xE2\x80\x86javascript:javascript:alert(96)" id="fuzzelement1">test</a>
<a href="\xE3\x80\x80javascript:javascript:alert(97)" id="fuzzelement1">test</a>
<a href="\x12javascript:javascript:alert(98)" id="fuzzelement1">test</a>
<a href="\x0Djavascript:javascript:alert(99)" id="fuzzelement1">test</a>
<a href="\x0Ajavascript:javascript:alert(100)" id="fuzzelement1">test</a>
<a href="\x0Cjavascript:javascript:alert(101)" id="fuzzelement1">test</a>
<a href="\x15javascript:javascript:alert(102)" id="fuzzelement1">test</a>
<a href="\xE2\x80\xA8javascript:javascript:alert(103)" id="fuzzelement1">test</a>
<a href="\x16javascript:javascript:alert(104)" id="fuzzelement1">test</a>
<a href="\x02javascript:javascript:alert(105)" id="fuzzelement1">test</a>
<a href="\x1Bjavascript:javascript:alert(106)" id="fuzzelement1">test</a>
<a href="\x06javascript:javascript:alert(107)" id="fuzzelement1">test</a>
<a href="\xE2\x80\xA9javascript:javascript:alert(108)" id="fuzzelement1">test</a>
<a href="\xE2\x80\x85javascript:javascript:alert(109)" id="fuzzelement1">test</a>
<a href="\x1Ejavascript:javascript:alert(110)" id="fuzzelement1">test</a>
<a href="\xE2\x81\x9Fjavascript:javascript:alert(111)" id="fuzzelement1">test</a>
<a href="\x1Cjavascript:javascript:alert(112)" id="fuzzelement1">test</a>
<a href="javascript\x00:javascript:alert(113)" id="fuzzelement1">test</a>
<a href="javascript\x3A:javascript:alert(114)" id="fuzzelement1">test</a>
<a href="javascript\x09:javascript:alert(115)" id="fuzzelement1">test</a>
<a href="javascript\x0D:javascript:alert(116)" id="fuzzelement1">test</a>
<a href="javascript\x0A:javascript:alert(117)" id="fuzzelement1">test</a>
`"'><img src=xxx:x \x0Aonerror=javascript:alert(118)>
`"'><img src=xxx:x \x22onerror=javascript:alert(119)>
`"'><img src=xxx:x \x0Bonerror=javascript:alert(120)>
`"'><img src=xxx:x \x0Donerror=javascript:alert(121)>
`"'><img src=xxx:x \x2Fonerror=javascript:alert(122)>
`"'><img src=xxx:x \x09onerror=javascript:alert(123)>
`"'><img src=xxx:x \x0Conerror=javascript:alert(124)>
`"'><img src=xxx:x \x00onerror=javascript:alert(125)>
`"'><img src=xxx:x \x27onerror=javascript:alert(126)>
`"'><img src=xxx:x \x20onerror=javascript:alert(127)>
"`'><script>\x3Bjavascript:alert(128)</script>
"`'><script>\x0Djavascript:alert(129)</script>
"`'><script>\xEF\xBB\xBFjavascript:alert(130)</script>
"`'><script>\xE2\x80\x81javascript:alert(131)</script>
"`'><script>\xE2\x80\x84javascript:alert(132)</script>
"`'><script>\xE3\x80\x80javascript:alert(133)</script>
"`'><script>\x09javascript:alert(134)</script>
"`'><script>\xE2\x80\x89javascript:alert(135)</script>
"`'><script>\xE2\x80\x85javascript:alert(136)</script>
"`'><script>\xE2\x80\x88javascript:alert(137)</script>
"`'><script>\x00javascript:alert(138)</script>
"`'><script>\xE2\x80\xA8javascript:alert(139)</script>
"`'><script>\xE2\x80\x8Ajavascript:alert(140)</script>
"`'><script>\xE1\x9A\x80javascript:alert(141)</script>
"`'><script>\x0Cjavascript:alert(142)</script>
"`'><script>\x2Bjavascript:alert(143)</script>
"`'><script>\xF0\x90\x96\x9Ajavascript:alert(144)</script>
"`'><script>-javascript:alert(145)</script>
"`'><script>\x0Ajavascript:alert(146)</script>
"`'><script>\xE2\x80\xAFjavascript:alert(147)</script>
"`'><script>\x7Ejavascript:alert(148)</script>
"`'><script>\xE2\x80\x87javascript:alert(149)</script>
"`'><script>\xE2\x81\x9Fjavascript:alert(150)</script>
"`'><script>\xE2\x80\xA9javascript:alert(151)</script>
"`'><script>\xC2\x85javascript:alert(152)</script>
"`'><script>\xEF\xBF\xAEjavascript:alert(153)</script>
"`'><script>\xE2\x80\x83javascript:alert(154)</script>
"`'><script>\xE2\x80\x8Bjavascript:alert(155)</script>
"`'><script>\xEF\xBF\xBEjavascript:alert(156)</script>
"`'><script>\xE2\x80\x80javascript:alert(157)</script>
"`'><script>\x21javascript:alert(158)</script>
"`'><script>\xE2\x80\x82javascript:alert(159)</script>
"`'><script>\xE2\x80\x86javascript:alert(160)</script>
"`'><script>\xE1\xA0\x8Ejavascript:alert(161)</script>
"`'><script>\x0Bjavascript:alert(162)</script>
"`'><script>\x20javascript:alert(163)</script>
"`'><script>\xC2\xA0javascript:alert(164)</script>
<img \x00src=x onerror="alert(165)">
<img \x47src=x onerror="javascript:alert(166)">
<img \x11src=x onerror="javascript:alert(167)">
<img \x12src=x onerror="javascript:alert(168)">
<img\x47src=x onerror="javascript:alert(169)">
<img\x10src=x onerror="javascript:alert(170)">
<img\x13src=x onerror="javascript:alert(171)">
<img\x32src=x onerror="javascript:alert(172)">
<img\x47src=x onerror="javascript:alert(173)">
<img\x11src=x onerror="javascript:alert(174)">
<img \x47src=x onerror="javascript:alert(175)">
<img \x34src=x onerror="javascript:alert(176)">
<img \x39src=x onerror="javascript:alert(177)">
<img \x00src=x onerror="javascript:alert(178)">
<img src\x09=x onerror="javascript:alert(179)">
<img src\x10=x onerror="javascript:alert(180)">
<img src\x13=x onerror="javascript:alert(181)">
<img src\x32=x onerror="javascript:alert(182)">
<img src\x12=x onerror="javascript:alert(183)">
<img src\x11=x onerror="javascript:alert(184)">
<img src\x00=x onerror="javascript:alert(185)">
<img src\x47=x onerror="javascript:alert(186)">
<img src=x\x09onerror="javascript:alert(187)">
<img src=x\x10onerror="javascript:alert(188)">
<img src=x\x11onerror="javascript:alert(189)">
<img src=x\x12onerror="javascript:alert(190)">
<img src=x\x13onerror="javascript:alert(191)">
<img[a][b][c]src[d]=x[e]onerror=[f]"alert(192)">
<img src=x onerror=\x09"javascript:alert(193)">
<img src=x onerror=\x10"javascript:alert(194)">
<img src=x onerror=\x11"javascript:alert(195)">
<img src=x onerror=\x12"javascript:alert(196)">
<img src=x onerror=\x32"javascript:alert(197)">
<img src=x onerror=\x00"javascript:alert(198)">
<a href=java&#1&#2&#3&#4&#5&#6&#7&#8&#11&#12script:javascript:alert(199)>XXX</a>
<img src="x` `<script>javascript:alert(200)</script>"` `>
<img src onerror /" '"= alt=javascript:alert(201)//">
<title onpropertychange=javascript:alert(202)></title><title title=>
<a href=http://foo.bar/#x=`y></a><img alt="`><img src=x:x onerror=javascript:alert(203)></a>">
<!--[if]><script>javascript:alert(204)</script -->
<!--[if<img src=x onerror=javascript:alert(205)//]> -->
<script src="/\%(jscript)s"></script>
<script src="\\%(jscript)s"></script>
<IMG """><SCRIPT>alert("206")</SCRIPT>">
<IMG SRC=javascript:alert(String.fromCharCode(50,48,55))>
<IMG SRC=# onmouseover="alert('208')">
<IMG SRC= onmouseover="alert('209')">
<IMG onmouseover="alert('210')">
<IMG SRC=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#50;&#49;&#49;&#39;&#41;>
<IMG SRC=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000050&#0000049&#0000050&#0000039&#0000041>
<IMG SRC=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x32&#x31&#x33&#x27&#x29>
<IMG SRC="jav   ascript:alert('214');">
<IMG SRC="jav&#x09;ascript:alert('215');">
<IMG SRC="jav&#x0A;ascript:alert('216');">
<IMG SRC="jav&#x0D;ascript:alert('217');">
perl -e 'print "<IMG SRC=java\0script:alert(\"218\")>";' > out
<IMG SRC=" &#14;  javascript:alert('219');">
<SCRIPT/XSS SRC="http://ha.ckers.org/xss.js"></SCRIPT>
<BODY onload!#$%&()*~+-_.,:;?@[/|\]^`=alert("220")>
<SCRIPT/SRC="http://ha.ckers.org/xss.js"></SCRIPT>
<<SCRIPT>alert("221");//<</SCRIPT>
<SCRIPT SRC=http://ha.ckers.org/xss.js?< B >
<SCRIPT SRC=//ha.ckers.org/.j>
<IMG SRC="javascript:alert('222')"
<iframe src=http://ha.ckers.org/scriptlet.html <
\";alert('223');//
<u oncopy=alert()> Copy me</u>
<i onwheel=alert(224)> Scroll over me </i>
<plaintext>
http://a/%%30%30
</textarea><script>alert(225)</script>
# SQL Injection
#
# Strings which can cause a SQL injection if inputs are not sanitized
1;DROP TABLE users
1'; DROP TABLE users-- 1
' OR 1=1 -- 1
' OR '1'='1
'; EXEC sp_MSForEachTable 'DROP TABLE ?'; --
%
_
# Server Code Injection
#
# Strings which can cause user to run code on server as a privileged user (c.f. https://news.ycombinator.com/item?id=7665153)
-
--
--version
--help
$USER
/dev/null; touch /tmp/blns.fail ; echo
`touch /tmp/blns.fail`
$(touch /tmp/blns.fail)
@{[system "touch /tmp/blns.fail"]}
# Command Injection (Ruby)
#
# Strings which can call system commands within Ruby/Rails applications
eval("puts 'hello world'")
System("ls -al /")
`ls -al /`
Kernel.exec("ls -al /")
Kernel.exit(1)
%x('ls -al /')
# XXE Injection (XML)
#
# String which can reveal system files when parsed by a badly configured XML parser
<?xml version="1.0" encoding="ISO-8859-1"?><!DOCTYPE foo [ <!ELEMENT foo ANY ><!ENTITY xxe SYSTEM "file:///etc/passwd" >]><foo>&xxe;</foo>
# Unwanted Interpolation
#
# Strings which can be accidentally expanded into different strings if evaluated in the wrong context, e.g. used as a printf format string or via Perl or shell eval. Might expose sensitive data from the program doing the interpolation, or might just represent the wrong string.
$HOME
$ENV{'HOME'}
%d
%s%s%s%s%s
{0}
%*.*s
%@
%n
File:///
# File Inclusion
#
# Strings which can cause user to pull in files that should not be a part of a web server
../../../../../../../../../../../etc/passwd%00
../../../../../../../../../../../etc/hosts
# Known CVEs and Vulnerabilities
#
# Strings that test for known vulnerabilities
() { 0; }; touch /tmp/blns.shellshock1.fail;
() { _; } >_[$($())] { touch /tmp/blns.shellshock2.fail; }
<<< %s(un='%s') = %u
+++ATH0
# MSDOS/Windows Special Filenames
#
# Strings which are reserved characters in MSDOS/Windows
CON
PRN
AUX
CLOCK$
NUL
A:
ZZ:
COM1
LPT1
LPT2
LPT3
COM2
COM3
COM4
# IRC specific strings
#
# Strings that may occur on IRC clients that make security products freak out
DCC SEND STARTKEYLOGGER 0 0 0
# Scunthorpe Problem
#
# Innocuous strings which may be blocked by profanity filters (https://en.wikipedia.org/wiki/Scunthorpe_problem)
Scunthorpe General Hospital
Penistone Community Church
Lightwater Country Park
Jimmy Clitheroe
Horniman Museum
shitake mushrooms
RomansInSussex.co.uk
http://www.cum.qc.ca/
Craig Cockburn, Software Specialist
Linda Callahan
Dr. Herman I. Libshitz
magna cum laude
Super Bowl XXX
medieval erection of parapets
evaluate
mocha
expression
Arsenal canal
classic
Tyson Gay
Dick Van Dyke
basement
# Human injection
#
# Strings which may cause human to reinterpret worldview
If you're reading this, you've been in a coma for almost 20 years now. We're trying a new technique. We don't know where this message will end up in your dream, but we hope it works. Please wake up, we miss you.
# Terminal escape codes
#
# Strings which punish the fools who use cat/type on this file
Roses are red, violets are blue. Hope you enjoy terminal hue
But now...for my greatest trick...
The quick brown fox... [Beeeep]
# iOS Vulnerabilities
#
# Strings which crashed iMessage in various versions of iOS
Powerلُلُصّبُلُلصّبُررً ॣ ॣh ॣ ॣ冗
🏳0🌈
జ్ఞ‌ా
# Persian special characters
#
# This is a four characters string which includes Persian special characters (گچپژ)
گچپژ
# jinja2 injection
#
# first one is supposed to raise "MemoryError" exception
# second, obviously, prints contents of /etc/passwd
{% print 'x' * 64 * 1024**3 %}
{{ "".__class__.__mro__[2].__subclasses__()[40]("/etc/passwd").read() }}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,76 +0,0 @@
mod example_document;
use std::{fs, path::Path};
use example_document::ExampleDocument;
use reconcile::{reconcile, reconcile_with_cursors};
use serde::Deserialize;
#[test]
fn test_document_one_way_without_cursors() {
for doc in &get_all_documents() {
doc.assert_eq_without_cursors(&reconcile(
&doc.parent(),
&doc.left().text,
&doc.right().text,
));
}
}
#[test]
fn test_document_one_way_with_cursors() {
for doc in &get_all_documents() {
doc.assert_eq(&reconcile_with_cursors(
&doc.parent(),
doc.left(),
doc.right(),
));
}
}
#[test]
fn test_document_inverse_way_without_cursors() {
for doc in &get_all_documents() {
doc.assert_eq_without_cursors(&reconcile(
&doc.parent(),
&doc.right().text,
&doc.left().text,
));
}
}
#[test]
fn test_document_inverse_way_with_cursors() {
for doc in &get_all_documents() {
doc.assert_eq(&reconcile_with_cursors(
&doc.parent(),
doc.right(),
doc.left(),
));
}
}
fn get_all_documents() -> Vec<ExampleDocument> {
let examples_dir = Path::new("tests/examples");
let entries = fs::read_dir(examples_dir)
.expect("Failed to read examples directory")
.collect::<Vec<_>>();
let mut documents = Vec::new();
for entry in entries {
let entry = entry.expect("Failed to read directory entry");
let path = entry.path();
if path.is_file() && path.extension().and_then(|ext| ext.to_str()) == Some("yml") {
let file = fs::File::open(&path).expect("Failed to open example file");
for document in serde_yaml::Deserializer::from_reader(file) {
let doc =
ExampleDocument::deserialize(document).expect("Failed to deserialize document");
documents.push(doc);
}
}
}
documents
}

View file

@ -1,32 +0,0 @@
[package]
name = "sync_lib"
version.workspace = true
edition.workspace = true
authors.workspace = true
license.workspace = true
repository.workspace = true
[lib]
crate-type = ["cdylib", "rlib"]
[dependencies]
base64 = "0.22.1"
reconcile = { path = "../reconcile" }
wasm-bindgen = "0.2.99"
thiserror = { workspace = true }
# The `console_error_panic_hook` crate provides better debugging of panics by
# logging them with `console.error`. This is great for development, but requires
# all the `std::fmt` and `std::panicking` infrastructure, so isn't great for
# code size when deploying.
console_error_panic_hook = { version = "0.1.7", optional = true }
[dev-dependencies]
wasm-bindgen-test = "0.3.49"
insta = "1.42.2"
[features]
default = ["console_error_panic_hook"]
[lints]
workspace = true

View file

@ -1,23 +0,0 @@
{
"name": "sync_lib",
"type": "module",
"collaborators": [
"Andras Schmelczer <andras@schmelczer.dev>"
],
"version": "0.4.0",
"license": "MIT",
"repository": {
"type": "git",
"url": "https://github.com/schmelczer/vault-link"
},
"files": [
"sync_lib_bg.wasm",
"sync_lib.js",
"sync_lib.d.ts"
],
"main": "sync_lib.js",
"types": "sync_lib.d.ts",
"sideEffects": [
"./snippets/*"
]
}

View file

@ -1,88 +0,0 @@
use wasm_bindgen::prelude::*;
/// Wrapper type to expose `TextWithCursors` to JS.
#[wasm_bindgen]
#[derive(Debug, Clone, PartialEq)]
pub struct TextWithCursors {
text: String,
cursors: Vec<CursorPosition>,
}
#[wasm_bindgen]
impl TextWithCursors {
#[wasm_bindgen(constructor)]
#[must_use]
pub fn new(text: String, cursors: Vec<CursorPosition>) -> Self { Self { text, cursors } }
#[must_use]
pub fn text(&self) -> String { self.text.clone() }
#[must_use]
pub fn cursors(&self) -> Vec<CursorPosition> { self.cursors.clone() }
}
impl From<TextWithCursors> for reconcile::TextWithCursors<'_> {
fn from(owned: TextWithCursors) -> Self {
reconcile::TextWithCursors::new_owned(
owned.text.to_string(),
owned
.cursors
.into_iter()
.map(std::convert::Into::into)
.collect(),
)
}
}
impl From<reconcile::TextWithCursors<'_>> for TextWithCursors {
fn from(text_with_cursors: reconcile::TextWithCursors<'_>) -> Self {
TextWithCursors {
text: text_with_cursors.text.into_owned(),
cursors: text_with_cursors
.cursors
.into_iter()
.map(std::convert::Into::into)
.collect(),
}
}
}
/// Wrapper type to expose `CursorPosition` to JS.
#[wasm_bindgen]
#[derive(Debug, Clone, PartialEq)]
pub struct CursorPosition {
id: usize,
char_index: usize,
}
#[wasm_bindgen]
impl CursorPosition {
#[wasm_bindgen(constructor)]
#[must_use]
pub fn new(id: usize, char_index: usize) -> Self { Self { id, char_index } }
#[must_use]
pub fn id(&self) -> usize { self.id }
#[wasm_bindgen(js_name = characterPosition)]
#[must_use]
pub fn char_index(&self) -> usize { self.char_index }
}
impl From<CursorPosition> for reconcile::CursorPosition {
fn from(owned: CursorPosition) -> Self {
reconcile::CursorPosition {
id: owned.id,
char_index: owned.char_index,
}
}
}
impl From<reconcile::CursorPosition> for CursorPosition {
fn from(cursor: reconcile::CursorPosition) -> Self {
CursorPosition {
id: cursor.id,
char_index: cursor.char_index,
}
}
}

View file

@ -1,29 +0,0 @@
use base64::DecodeError;
use thiserror::Error;
use wasm_bindgen::JsValue;
#[derive(Error, Debug)]
pub enum SyncLibError {
#[error("Base64 decoding error because of {}", .reason)]
Base64DecodingError { reason: String },
}
impl From<DecodeError> for SyncLibError {
fn from(e: DecodeError) -> Self {
SyncLibError::Base64DecodingError {
reason: e.to_string(),
}
}
}
impl From<std::string::FromUtf8Error> for SyncLibError {
fn from(e: std::string::FromUtf8Error) -> Self {
SyncLibError::Base64DecodingError {
reason: e.to_string(),
}
}
}
impl From<SyncLibError> for JsValue {
fn from(val: SyncLibError) -> Self { JsValue::from_str(&val.to_string()) }
}

View file

@ -1,152 +0,0 @@
//! This crate provides utilities for easily communicating between backend &
//! frontend and ensuring the same logic for encoding and decoding binary data,
//! and 3-way-merging documents in Rust and JavaScript.
//!
//! The crate is designed to be used as a Rust library and as a
//! TypeScript/JavaScript package through WebAssembly (WASM).
//!
//! # Modules
//!
//! - `errors`: Contains error types used in this crate.
use core::str;
use base64::{Engine as _, engine::general_purpose::STANDARD};
use cursor::TextWithCursors;
use errors::SyncLibError;
use wasm_bindgen::prelude::*;
pub mod cursor;
pub mod errors;
/// Encode binary data for easy transport over HTTP. Inverse of
/// `base64_to_bytes`.
///
/// # Arguments
///
/// - `input`: The binary data to encode.
///
/// # Returns
///
/// The base64-encoded string.
///
/// # Panics
///
/// If the input is not valid UTF-8.
#[wasm_bindgen(js_name = bytesToBase64)]
#[must_use]
pub fn bytes_to_base64(input: &[u8]) -> String {
set_panic_hook();
STANDARD.encode(input)
}
/// Inverse of `bytes_to_base64`.
/// Decode base64-encoded data into binary data.
///
/// # Arguments
///
/// - `input`: The base64-encoded string.
///
/// # Returns
///
/// The decoded binary data.
///
/// # Errors
///
/// If the input is not valid base64.
#[wasm_bindgen(js_name = base64ToBytes)]
pub fn base64_to_bytes(input: &str) -> Result<Vec<u8>, SyncLibError> {
set_panic_hook();
STANDARD.decode(input).map_err(SyncLibError::from)
}
/// Merge two documents with a common parent. Relies on `reconcile::reconcile`
/// for texts and returns the right document as-is if either of the updated
/// documents is binary.
///
/// # Arguments
///
/// - `parent`: The common parent document.
/// - `left`: The left document updated by one user.
/// - `right`: The right document updated by another user.
///
/// # Returns
///
/// The merged document.
///
/// # Panics
///
/// If any of the input documents are not valid UTF-8 strings.
#[wasm_bindgen]
#[must_use]
pub fn merge(parent: &[u8], left: &[u8], right: &[u8]) -> Vec<u8> {
set_panic_hook();
if is_binary(parent) || is_binary(left) || is_binary(right) {
right.to_vec()
} else {
reconcile::reconcile(
str::from_utf8(parent).expect("parent must be valid UTF-8 because it's not binary"),
str::from_utf8(left).expect("left must be valid UTF-8 because it's not binary"),
str::from_utf8(right).expect("right must be valid UTF-8 because it's not binary"),
)
.into_bytes()
}
}
/// WASM wrapper around `reconcile::reconcile` for merging text.
#[wasm_bindgen(js_name = mergeText)]
#[must_use]
pub fn merge_text(parent: &str, left: &str, right: &str) -> String {
set_panic_hook();
reconcile::reconcile(parent, left, right)
}
/// WASM wrapper around `reconcile::reconcile_with_cursors` for merging text.
#[wasm_bindgen(js_name = mergeTextWithCursors)]
#[must_use]
pub fn merge_text_with_cursors(
parent: &str,
left: TextWithCursors,
right: TextWithCursors,
) -> TextWithCursors {
set_panic_hook();
reconcile::reconcile_with_cursors(parent, left.into(), right.into()).into()
}
/// Heuristically determine if the given data is a binary or a text file's
/// content.
#[wasm_bindgen(js_name = isBinary)]
#[must_use]
pub fn is_binary(data: &[u8]) -> bool {
set_panic_hook();
if data.contains(&0) {
// Even though the NUL character is valid in UTF-8, it's highly suspicious in
// human-readable text.
return true;
}
std::str::from_utf8(data).is_err()
}
/// We don't want to support merging structured data like JSON, YAML, etc.
#[wasm_bindgen(js_name = isFileTypeMergable)]
#[must_use]
pub fn is_file_type_mergable(path_or_file_name: &str) -> bool {
set_panic_hook();
let file_extension = path_or_file_name.split('.').next_back().unwrap_or_default();
matches!(file_extension.to_lowercase().as_str(), "md" | "txt")
}
fn set_panic_hook() {
// https://github.com/rustwasm/console_error_panic_hook#readme
#[cfg(feature = "console_error_panic_hook")]
console_error_panic_hook::set_once();
}

View file

@ -1,10 +0,0 @@
---
source: sync_lib/tests/web.rs
expression: base64_to_bytes(input)
snapshot_kind: text
---
Err(
Base64DecodingError {
reason: "Invalid symbol 61, offset 0.",
},
)

View file

@ -1,99 +0,0 @@
use insta::assert_debug_snapshot;
use sync_lib::{
cursor::{CursorPosition, TextWithCursors},
*,
};
use wasm_bindgen_test::*;
#[wasm_bindgen_test(unsupported = test)]
fn test_bytes_to_base64() {
let input = b"hello";
let expected = "aGVsbG8=";
assert_eq!(bytes_to_base64(input), expected);
}
#[wasm_bindgen_test(unsupported = test)]
fn test_base64_to_bytes() {
let input = "aGVsbG8=";
let expected = b"hello".to_vec();
assert_eq!(base64_to_bytes(input).unwrap(), expected);
}
#[test] // insta doesn't support wasm-bindgen-test
fn test_base64_to_bytes_error() {
let input = "===";
assert_debug_snapshot!(base64_to_bytes(input));
}
#[wasm_bindgen_test(unsupported = test)]
fn test_merge() {
let left = b"hello ";
let right = b"world";
let result = merge(b"", left, right);
assert_eq!(result, b"hello world");
let left = b"\0binary";
let right = b"other";
let result = merge(b"", left, right);
assert_eq!(result, right);
}
#[wasm_bindgen_test(unsupported = test)]
fn test_merge_text() {
let left = "hello ";
let right = "world";
let result = merge_text("", left, right);
assert_eq!(result, "hello world");
}
#[wasm_bindgen_test(unsupported = test)]
fn test_merge_text_with_cursors() {
let result = merge_text_with_cursors(
"hi",
TextWithCursors::new("hi world".to_owned(), vec![]),
TextWithCursors::new(
"hi".to_owned(),
vec![CursorPosition::new(0, 1), CursorPosition::new(1, 2)],
),
);
assert_eq!(
result,
TextWithCursors::new(
"hi world".to_owned(),
vec![CursorPosition::new(0, 1), CursorPosition::new(1, 2)]
),
);
}
#[wasm_bindgen_test(unsupported = test)]
fn merge_binary() {
let left = [0, 1, 2];
let right = [3, 4, 5];
assert_eq!(merge(b"", &left, &right), right);
}
#[wasm_bindgen_test(unsupported = test)]
fn test_is_binary() {
assert!(is_binary(&[0, 159, 146, 150]));
assert!(is_binary(&[0, 12]));
assert!(!is_binary(b"hello"));
}
#[wasm_bindgen_test(unsupported = test)]
fn test_is_binary_empty() {
assert!(!is_binary(b""));
}
#[wasm_bindgen_test(unsupported = test)]
fn test_is_file_type_mergable() {
assert!(is_file_type_mergable(".md"));
assert!(is_file_type_mergable("hi.md"));
assert!(is_file_type_mergable("my/path/to/my/document.md"));
assert!(is_file_type_mergable("hi.MD"));
assert!(is_file_type_mergable("my/path/to/my/DOCUMENT.MD"));
assert!(!is_file_type_mergable(".json"));
assert!(!is_file_type_mergable("HELLO.JSON"));
assert!(!is_file_type_mergable("my/config.yml"));
}

View file

@ -1,40 +0,0 @@
[package]
name = "sync_server"
version.workspace = true
edition.workspace = true
authors.workspace = true
license.workspace = true
repository.workspace = true
[dependencies]
sync_lib = { path = "../sync_lib" }
serde = { workspace = true }
thiserror = { workspace = true }
tokio = { version = "1.44.2", features = ["full"]}
uuid = { version = "1.16.0", features = ["v4", "serde"] }
log = { version = "0.4.27" }
anyhow = { version = "1.0.98", features = ["backtrace"] }
axum = { version = "0.7.4", features = ["ws", "macros", "tracing", "multipart"]}
axum-extra = { version = "0.9.6", features = ["typed-header"] }
axum_typed_multipart = "0.11.0"
tower-http = { version = "0.6.1", features = ["cors", "trace", "limit", "timeout"] }
tracing = "0.1.41"
tracing-subscriber = { version = "0.3.19", features = ["fmt", "env-filter"]}
sqlx = { version = "0.8.6", features = ["sqlite", "runtime-tokio", "uuid", "chrono"] }
chrono = { version = "0.4.41", features = ["serde"] }
rand = "0.9.0"
sanitize-filename = "0.6.0"
regex = "1.11.1"
clap = { version = "4.5.38", features = ["derive"] }
futures = "0.3.31"
serde_yaml = "0.9.34"
serde_json = "1.0.140"
clap-verbosity-flag = "3.0.3"
bimap = "0.6.3"
ts-rs = { version = "10.1", features = ["uuid-impl", "chrono-impl"] }
serde_with = "3.12.0"
[lints]
workspace = true

View file

@ -1,9 +0,0 @@
# Sync server
## Creating/resetting the Database for development
```sh
sqlx database create --database-url sqlite://db.sqlite3
sqlx migrate run --source sync_server/src/app_state/database/migrations --database-url sqlite://db.sqlite3
cargo sqlx prepare --workspace
```

View file

@ -1,5 +0,0 @@
// generated by `sqlx migrate build-script`
fn main() {
// trigger recompilation when a new migration is added
println!("cargo:rerun-if-changed=migrations");
}

View file

@ -1,41 +0,0 @@
pub mod cursors;
pub mod database;
pub mod websocket;
use std::ffi::OsString;
use anyhow::Result;
use cursors::Cursors;
use database::Database;
use websocket::broadcasts::Broadcasts;
use crate::{config::Config, consts::DEFAULT_CONFIG_PATH};
#[derive(Clone, Debug)]
pub struct AppState {
pub config: Config,
pub database: Database,
pub cursors: Cursors,
pub broadcasts: Broadcasts,
}
impl AppState {
pub async fn try_new(config_path: Option<OsString>) -> Result<Self> {
let config_path = config_path.unwrap_or_else(|| OsString::from(DEFAULT_CONFIG_PATH));
let path = std::path::PathBuf::from(config_path);
let config = Config::read_or_create(&path).await?;
let broadcasts = Broadcasts::new(&config.server);
let database = Database::try_new(&config.database, &broadcasts).await?;
let cursors: Cursors = Cursors::new(&config.database, &broadcasts);
Cursors::start_background_task(cursors.clone());
Ok(Self {
config,
database,
cursors,
broadcasts,
})
}
}

View file

@ -1,128 +0,0 @@
use core::time::Duration;
use std::{collections::HashMap, sync::Arc};
use tokio::sync::Mutex;
use super::{
database::models::{DeviceId, VaultId},
websocket::{
broadcasts::Broadcasts,
models::{
ClientCursors, CursorPositionFromServer, CursorSpan, WebSocketServerMessage,
WebSocketServerMessageWithOrigin,
},
},
};
use crate::config::database_config::DatabaseConfig;
#[derive(Clone, Debug)]
pub struct Cursors {
config: DatabaseConfig,
broadcasts: Broadcasts,
vault_to_cursors: Arc<Mutex<HashMap<VaultId, Vec<ClientCursorsWithTimeToLive>>>>,
}
impl Cursors {
pub fn new(config: &DatabaseConfig, broadcasts: &Broadcasts) -> Self {
Self {
config: config.clone(),
broadcasts: broadcasts.clone(),
vault_to_cursors: Arc::new(Mutex::new(HashMap::new())),
}
}
pub async fn update_cursors(
&self,
vault_id: VaultId,
user_name: String,
device_id: &DeviceId,
document_to_cursors: HashMap<String, Vec<CursorSpan>>,
) {
let mut vault_to_cursors = self.vault_to_cursors.lock().await;
let all_device_cursors = vault_to_cursors.entry(vault_id).or_insert_with(Vec::new);
all_device_cursors.retain(|c| &c.client_cursors.device_id != device_id);
all_device_cursors.push(ClientCursorsWithTimeToLive::new(ClientCursors {
user_name,
device_id: device_id.to_string(),
cursors: document_to_cursors,
}));
drop(vault_to_cursors); // Explicitly drop the lock before broadcasting to avoid deadlock
self.broadcast_cursors().await;
}
pub async fn get_cursors(&self, vault_id: &VaultId) -> Vec<ClientCursors> {
let vault_to_cursors = self.vault_to_cursors.lock().await;
vault_to_cursors
.get(vault_id)
.map(|cursors| {
cursors
.iter()
.cloned()
.map(|with_ttl| with_ttl.client_cursors)
.collect::<Vec<_>>()
})
.unwrap_or_default()
}
pub fn start_background_task(self) {
tokio::spawn(async move {
loop {
self.remove_expired_cursors().await;
tokio::time::sleep(Duration::from_secs(1)).await;
}
});
}
async fn remove_expired_cursors(&self) {
let mut vault_to_cursors = self.vault_to_cursors.lock().await;
for (_vault_id, cursors) in vault_to_cursors.iter_mut() {
cursors.retain(|cursor| !cursor.is_expired(self.config.cursor_timeout));
}
}
async fn broadcast_cursors(&self) {
let vault_to_cursors = self.vault_to_cursors.lock().await;
for (vault_id, cursors) in vault_to_cursors.iter() {
self.broadcasts
.send_document_update(
vault_id.clone(),
WebSocketServerMessageWithOrigin::new(WebSocketServerMessage::CursorPositions(
CursorPositionFromServer {
clients: cursors.iter().map(|c| c.client_cursors.clone()).collect(),
},
)),
)
.await;
}
}
pub async fn remove_cursors_of_device(&self, vault_id: &str, device_id: &str) {
let mut vault_to_cursors = self.vault_to_cursors.lock().await;
if let Some(cursors) = vault_to_cursors.get_mut(vault_id) {
cursors.retain(|c| c.client_cursors.device_id != device_id);
}
}
}
#[derive(Clone, Debug)]
struct ClientCursorsWithTimeToLive {
client_cursors: ClientCursors,
last_updated: std::time::Instant,
}
impl ClientCursorsWithTimeToLive {
fn new(client_cursors: ClientCursors) -> Self {
Self {
client_cursors,
last_updated: std::time::Instant::now(),
}
}
pub fn is_expired(&self, ttl: Duration) -> bool { self.last_updated.elapsed() > ttl }
}

View file

@ -1,425 +0,0 @@
use core::time::Duration;
use std::{collections::HashMap, sync::Arc};
use anyhow::{Context as _, Result};
use models::{
DocumentId, DocumentVersionWithoutContent, StoredDocumentVersion, VaultId, VaultUpdateId,
};
use sqlx::{sqlite::SqliteConnectOptions, types::chrono::Utc};
pub mod models;
use sqlx::{Pool, Sqlite, sqlite::SqlitePoolOptions};
use tokio::sync::Mutex;
use uuid::fmt::Hyphenated;
use super::websocket::{
broadcasts::Broadcasts,
models::{WebSocketServerMessage, WebSocketServerMessageWithOrigin, WebSocketVaultUpdate},
};
use crate::config::database_config::DatabaseConfig;
#[derive(Clone, Debug)]
pub struct Database {
config: DatabaseConfig,
broadcasts: Broadcasts,
connection_pools: Arc<Mutex<HashMap<VaultId, Pool<Sqlite>>>>,
}
pub type Transaction<'a> = sqlx::Transaction<'a, Sqlite>;
impl Database {
pub async fn try_new(config: &DatabaseConfig, broadcasts: &Broadcasts) -> Result<Self> {
tokio::fs::create_dir_all(&config.databases_directory_path)
.await
.with_context(|| {
format!(
"Failed to create databases directory: {}",
config.databases_directory_path.to_string_lossy()
)
})?;
let mut connection_pools = std::collections::HashMap::new();
let mut entries = tokio::fs::read_dir(&config.databases_directory_path).await?;
while let Some(entry) = entries.next_entry().await? {
if !entry.file_name().to_string_lossy().ends_with(".sqlite") {
continue;
}
let vault: VaultId = entry
.file_name()
.to_string_lossy()
.trim_end_matches(".sqlite")
.to_owned();
connection_pools.insert(
vault.clone(),
Self::create_vault_database(config, &vault).await?,
);
}
Ok(Self {
config: config.clone(),
connection_pools: Arc::new(Mutex::new(connection_pools)),
broadcasts: broadcasts.clone(),
})
}
async fn create_vault_database(
config: &DatabaseConfig,
vault: &VaultId,
) -> Result<Pool<Sqlite>> {
let file_name = config
.databases_directory_path
.join(format!("{vault}.sqlite"));
let connection_options = SqliteConnectOptions::new()
.filename(file_name.clone())
.create_if_missing(true)
.busy_timeout(Duration::from_secs(3600))
.journal_mode(sqlx::sqlite::SqliteJournalMode::Wal);
let pool = SqlitePoolOptions::new()
.max_connections(config.max_connections_per_vault)
.test_before_acquire(true)
.connect_with(connection_options)
.await
.with_context(|| format!("Cannot open database at {}", file_name.display()))?;
Self::run_migrations(&pool).await?;
Ok(pool)
}
async fn run_migrations(pool: &Pool<Sqlite>) -> Result<()> {
sqlx::migrate!("src/app_state/database/migrations")
.run(pool)
.await
.context("Cannot check for pending migrations")
}
async fn get_connection_pool(&self, vault: &VaultId) -> Result<Pool<Sqlite>> {
let mut pools = self.connection_pools.lock().await;
if !pools.contains_key(vault) {
let pool = Self::create_vault_database(&self.config, vault).await?;
pools.insert(vault.clone(), pool);
}
let pool = pools
.get(vault)
.expect("Pool was just inserted or already exists");
Ok(pool.clone())
}
/// Attempting to write from this transaction might result in a
/// database locked error. Use this transaction for read-only operations.
pub async fn create_readonly_transaction(
&self,
vault: &VaultId,
) -> Result<Transaction<'static>> {
self.get_connection_pool(vault)
.await?
.begin()
.await
.context("Cannot create transaction")
}
pub async fn create_write_transaction(&self, vault: &VaultId) -> Result<Transaction<'static>> {
let mut transaction = self.create_readonly_transaction(vault).await?;
// sqlx doesn't support immediate transactions for sqlite: https://github.com/launchbadge/sqlx/issues/481
sqlx::query!("END; BEGIN IMMEDIATE;")
.execute(&mut *transaction)
.await?;
Ok(transaction)
}
/// Return the latest state of all documents in the vault
pub async fn get_latest_documents(
&self,
vault: &VaultId,
transaction: Option<&mut Transaction<'_>>,
) -> Result<Vec<DocumentVersionWithoutContent>> {
let query = sqlx::query!(
r#"
select
vault_update_id,
document_id as "document_id: Hyphenated",
relative_path,
updated_date as "updated_date: chrono::DateTime<Utc>",
is_deleted,
user_id,
device_id,
length(content) as "content_size: u64"
from latest_document_versions
order by vault_update_id
"#,
);
if let Some(transaction) = transaction {
query.fetch_all(&mut **transaction).await
} else {
query
.fetch_all(&self.get_connection_pool(vault).await?)
.await
}
.context("Cannot fetch latest documents")
.map(|rows| {
rows.into_iter()
.map(|row| DocumentVersionWithoutContent {
vault_update_id: row.vault_update_id,
document_id: row.document_id.into(),
relative_path: row.relative_path,
updated_date: row.updated_date,
is_deleted: row.is_deleted,
user_id: row.user_id,
device_id: row.device_id,
content_size: row
.content_size
.expect("Content size can't be null but sqlx can't infer it"),
})
.collect()
})
}
/// Return the latest state of all documents (including deleted) in the
/// vault which have changed since the given update id
pub async fn get_latest_documents_since(
&self,
vault: &VaultId,
vault_update_id: VaultUpdateId,
transaction: Option<&mut Transaction<'_>>,
) -> Result<Vec<DocumentVersionWithoutContent>> {
let query = sqlx::query!(
r#"
select
vault_update_id,
document_id as "document_id: Hyphenated",
relative_path,
updated_date as "updated_date: chrono::DateTime<Utc>",
is_deleted,
user_id,
device_id,
length(content) as "content_size: u64"
from latest_document_versions
where vault_update_id > ?
order by vault_update_id
"#,
vault_update_id
);
if let Some(transaction) = transaction {
query.fetch_all(&mut **transaction).await
} else {
query
.fetch_all(&self.get_connection_pool(vault).await?)
.await
}
.with_context(|| {
format!("Cannot fetch latest documents since vault_update_id {vault_update_id}")
})
.map(|rows| {
rows.into_iter()
.map(|row| DocumentVersionWithoutContent {
vault_update_id: row.vault_update_id,
document_id: row.document_id.into(),
relative_path: row.relative_path,
updated_date: row.updated_date,
is_deleted: row.is_deleted,
user_id: row.user_id,
device_id: row.device_id,
content_size: row
.content_size
.expect("Content size can't be null but sqlx can't infer it"),
})
.collect()
})
}
pub async fn get_max_update_id_in_vault(
&self,
vault: &VaultId,
transaction: Option<&mut Transaction<'_>>,
) -> Result<i64> {
let query = sqlx::query!(
r#"
select coalesce(max(vault_update_id), 0) as max_vault_update_id
from documents
"#,
);
if let Some(transaction) = transaction {
query.fetch_one(&mut **transaction).await
} else {
query
.fetch_one(&self.get_connection_pool(vault).await?)
.await
}
.map(|row| row.max_vault_update_id)
.context("Cannot fetch max update id in vault")
}
pub async fn get_latest_document_by_path(
&self,
vault: &VaultId,
relative_path: &str,
transaction: Option<&mut Transaction<'_>>,
) -> Result<Option<StoredDocumentVersion>> {
let query = sqlx::query_as!(
StoredDocumentVersion,
r#"
select
vault_update_id,
document_id as "document_id: Hyphenated",
relative_path,
updated_date as "updated_date: chrono::DateTime<Utc>",
content,
is_deleted,
user_id,
device_id
from latest_document_versions
where relative_path = ?
order by vault_update_id desc -- `latest_document_versions` only contains a single latest version of each document, however,
-- multiple documents can have the same `relative_path`, if they have been deleted. That's
-- why we only care about the latest version of the document with the given relative path.
limit 1
"#,
relative_path
);
if let Some(transaction) = transaction {
query.fetch_optional(&mut **transaction).await
} else {
query
.fetch_optional(&self.get_connection_pool(vault).await?)
.await
}
.context("Cannot fetch latest document version")
}
pub async fn get_latest_document(
&self,
vault: &VaultId,
document_id: &DocumentId,
transaction: Option<&mut Transaction<'_>>,
) -> Result<Option<StoredDocumentVersion>> {
let document_id = document_id.as_hyphenated();
let query = sqlx::query_as!(
StoredDocumentVersion,
r#"
select
vault_update_id,
document_id as "document_id: Hyphenated",
relative_path,
updated_date as "updated_date: chrono::DateTime<Utc>",
content,
is_deleted,
user_id,
device_id
from latest_document_versions
where document_id = ?
"#,
document_id
);
if let Some(transaction) = transaction {
query.fetch_optional(&mut **transaction).await
} else {
query
.fetch_optional(&self.get_connection_pool(vault).await?)
.await
}
.context("Cannot fetch latest document version")
}
pub async fn get_document_version(
&self,
vault: &VaultId,
vault_update_id: VaultUpdateId,
transaction: Option<&mut Transaction<'_>>,
) -> Result<Option<StoredDocumentVersion>> {
let query = sqlx::query_as!(
StoredDocumentVersion,
r#"
select
vault_update_id,
document_id as "document_id: Hyphenated",
relative_path,
updated_date as "updated_date: chrono::DateTime<Utc>",
content,
is_deleted,
user_id,
device_id
from documents
where vault_update_id = ?"#,
vault_update_id
);
if let Some(transaction) = transaction {
query.fetch_optional(&mut **transaction).await
} else {
query
.fetch_optional(&self.get_connection_pool(vault).await?)
.await
}
.context("Cannot fetch document version")
}
pub async fn insert_document_version(
&self,
vault_id: &VaultId,
version: &StoredDocumentVersion,
transaction: Option<&mut Transaction<'_>>,
) -> Result<()> {
let document_id = version.document_id.as_hyphenated();
let query = sqlx::query!(
r#"
insert into documents (
vault_update_id,
document_id,
relative_path,
updated_date,
content,
is_deleted,
user_id,
device_id
)
values (?, ?, ?, ?, ?, ?, ?, ?)
"#,
version.vault_update_id,
document_id,
version.relative_path,
version.updated_date,
version.content,
version.is_deleted,
version.user_id,
version.device_id
);
if let Some(transaction) = transaction {
query.execute(&mut **transaction).await
} else {
query
.execute(&self.get_connection_pool(vault_id).await?)
.await
}
.context("Cannot insert document version")?;
self.broadcasts
.send_document_update(
vault_id.clone(),
WebSocketServerMessageWithOrigin::with_origin(
version.device_id.clone(),
WebSocketServerMessage::VaultUpdate(WebSocketVaultUpdate {
documents: vec![version.clone().into()],
is_initial_sync: false,
}),
),
)
.await;
Ok(())
}
}

View file

@ -1,21 +0,0 @@
CREATE TABLE IF NOT EXISTS documents (
vault_update_id INTEGER NOT NULL PRIMARY KEY,
document_id TEXT NOT NULL,
relative_path TEXT NOT NULL,
updated_date TIMESTAMP NOT NULL,
content BLOB NOT NULL,
is_deleted BOOLEAN NOT NULL
);
CREATE VIEW IF NOT EXISTS latest_document_versions AS
SELECT d.*
FROM documents d
INNER JOIN (
SELECT MAX(vault_update_id) AS max_version_id
FROM documents
GROUP BY document_id
) max_versions
ON d.vault_update_id = max_versions.max_version_id;
CREATE INDEX IF NOT EXISTS idx_documents_vault_id_relative_path
ON documents (relative_path);

View file

@ -1,2 +0,0 @@
ALTER TABLE documents ADD COLUMN user_id TEXT NOT NULL DEFAULT "";
ALTER TABLE documents ADD COLUMN device_id TEXT NOT NULL DEFAULT "";

View file

@ -1,89 +0,0 @@
use chrono::{DateTime, Utc};
use serde::Serialize;
use sync_lib::bytes_to_base64;
use ts_rs::TS;
pub type VaultId = String;
pub type VaultUpdateId = i64;
pub type DocumentId = uuid::Uuid;
pub type UserId = String;
pub type DeviceId = String;
#[derive(Debug, Clone)]
pub struct StoredDocumentVersion {
pub vault_update_id: VaultUpdateId,
pub document_id: DocumentId,
pub relative_path: String,
pub updated_date: DateTime<Utc>,
pub content: Vec<u8>,
pub is_deleted: bool,
pub user_id: UserId,
pub device_id: DeviceId,
}
impl PartialEq<Self> for StoredDocumentVersion {
fn eq(&self, other: &Self) -> bool { self.vault_update_id == other.vault_update_id }
}
#[derive(TS, Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct DocumentVersionWithoutContent {
#[ts(as = "i32")]
pub vault_update_id: VaultUpdateId,
pub document_id: DocumentId,
pub relative_path: String,
pub updated_date: DateTime<Utc>,
pub is_deleted: bool,
pub user_id: UserId,
pub device_id: DeviceId,
#[ts(as = "i32")]
pub content_size: u64,
}
impl From<StoredDocumentVersion> for DocumentVersionWithoutContent {
fn from(value: StoredDocumentVersion) -> Self {
Self {
vault_update_id: value.vault_update_id,
document_id: value.document_id,
relative_path: value.relative_path,
updated_date: value.updated_date,
is_deleted: value.is_deleted,
user_id: value.user_id,
device_id: value.device_id,
content_size: value.content.len() as u64,
}
}
}
#[derive(TS, Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct DocumentVersion {
#[ts(as = "i32")]
pub vault_update_id: VaultUpdateId,
pub document_id: DocumentId,
pub relative_path: String,
pub updated_date: DateTime<Utc>,
pub content_base64: String,
pub is_deleted: bool,
pub user_id: UserId,
pub device_id: DeviceId,
}
impl From<StoredDocumentVersion> for DocumentVersion {
fn from(value: StoredDocumentVersion) -> Self {
Self {
vault_update_id: value.vault_update_id,
document_id: value.document_id,
relative_path: value.relative_path,
updated_date: value.updated_date,
content_base64: bytes_to_base64(&value.content),
is_deleted: value.is_deleted,
user_id: value.user_id,
device_id: value.device_id,
}
}
}

View file

@ -1,3 +0,0 @@
pub mod broadcasts;
pub mod models;
pub mod utils;

View file

@ -1,63 +0,0 @@
use std::{collections::HashMap, sync::Arc};
use anyhow::Context;
use tokio::sync::{Mutex, broadcast};
use super::models::WebSocketServerMessageWithOrigin;
use crate::{
app_state::database::models::VaultId, config::server_config::ServerConfig, errors::server_error,
};
#[derive(Debug, Clone)]
pub struct Broadcasts {
max_clients_per_vault: usize,
tx: Arc<Mutex<HashMap<VaultId, broadcast::Sender<WebSocketServerMessageWithOrigin>>>>,
}
impl Broadcasts {
pub fn new(server_config: &ServerConfig) -> Self {
Self {
max_clients_per_vault: server_config.max_clients_per_vault,
tx: Arc::new(Mutex::new(HashMap::new())),
}
}
pub async fn get_receiver(
&self,
vault: VaultId,
) -> broadcast::Receiver<WebSocketServerMessageWithOrigin> {
let tx = self.get_or_create(vault).await;
tx.subscribe()
}
/// Notify all clients (who are subscribed to the vault) about an update.
/// We only log failures.
pub async fn send_document_update(
&self,
vault: VaultId,
document: WebSocketServerMessageWithOrigin,
) {
let tx = self.get_or_create(vault).await;
let result = tx
.send(document)
.context("Cannot broadcast server message to websocket listeners")
.map_err(server_error);
if result.is_err() {
log::debug!("Failed to send message: {result:?}");
}
}
async fn get_or_create(
&self,
vault: VaultId,
) -> broadcast::Sender<WebSocketServerMessageWithOrigin> {
let mut tx = self.tx.lock().await;
tx.entry(vault)
.or_insert_with(|| broadcast::channel(self.max_clients_per_vault).0.clone())
.clone()
}
}

View file

@ -1,88 +0,0 @@
use std::collections::HashMap;
use serde::{Deserialize, Serialize};
use ts_rs::TS;
use crate::app_state::database::models::{DeviceId, DocumentVersionWithoutContent, VaultUpdateId};
#[derive(TS, Deserialize, Clone, Debug)]
#[serde(rename_all = "camelCase")]
pub struct WebSocketHandshake {
pub token: String,
pub device_id: DeviceId,
#[ts(as = "Option<i32>")]
pub last_seen_vault_update_id: Option<VaultUpdateId>,
}
#[derive(TS, Serialize, Deserialize, Clone, Debug)]
#[serde(rename_all = "camelCase")]
pub struct CursorSpan {
pub start: usize,
pub end: usize,
}
#[derive(TS, Deserialize, Clone, Debug)]
#[serde(rename_all = "camelCase")]
pub struct CursorPositionFromClient {
pub document_to_cursors: HashMap<String, Vec<CursorSpan>>,
}
#[derive(TS, Serialize, Clone, Debug)]
#[serde(rename_all = "camelCase")]
pub struct ClientCursors {
pub user_name: String,
pub device_id: DeviceId,
pub cursors: HashMap<String, Vec<CursorSpan>>,
}
#[derive(TS, Serialize, Clone, Debug)]
#[serde(rename_all = "camelCase")]
pub struct CursorPositionFromServer {
pub clients: Vec<ClientCursors>,
}
#[derive(TS, Serialize, Clone, Debug)]
#[serde(rename_all = "camelCase")]
pub struct WebSocketVaultUpdate {
pub documents: Vec<DocumentVersionWithoutContent>,
pub is_initial_sync: bool,
}
#[derive(TS, Deserialize, Clone, Debug)]
#[serde(rename_all = "camelCase", tag = "type")]
#[ts(export)]
pub enum WebSocketClientMessage {
Handshake(WebSocketHandshake),
CursorPositions(CursorPositionFromClient),
}
#[derive(TS, Serialize, Clone, Debug)]
#[serde(rename_all = "camelCase", tag = "type")]
#[ts(export)]
pub enum WebSocketServerMessage {
VaultUpdate(WebSocketVaultUpdate),
CursorPositions(CursorPositionFromServer),
}
#[derive(Clone, Debug)]
pub struct WebSocketServerMessageWithOrigin {
pub origin_device_id: Option<DeviceId>,
pub message: WebSocketServerMessage,
}
impl WebSocketServerMessageWithOrigin {
pub fn new(message: WebSocketServerMessage) -> Self {
Self {
origin_device_id: None,
message,
}
}
pub fn with_origin(origin_device_id: DeviceId, message: WebSocketServerMessage) -> Self {
Self {
origin_device_id: Some(origin_device_id),
message,
}
}
}

View file

@ -1,80 +0,0 @@
use anyhow::Context;
use axum::extract::ws::{Message, WebSocket};
use futures::{sink::SinkExt, stream::SplitSink};
use super::models::{WebSocketClientMessage, WebSocketHandshake, WebSocketServerMessage};
use crate::{
app_state::{
AppState,
database::models::{DocumentVersionWithoutContent, VaultId, VaultUpdateId},
},
config::user_config::User,
errors::{SyncServerError, server_error, unauthenticated_error},
server::auth::auth,
};
pub struct AuthenticatedWebSocketHandshake {
pub handshake: WebSocketHandshake,
pub user: User,
}
pub fn get_authenticated_handshake(
state: &AppState,
vault_id: &VaultId,
message: Option<Message>,
) -> Result<AuthenticatedWebSocketHandshake, SyncServerError> {
if let Some(Message::Text(message)) = message {
let message: WebSocketClientMessage = serde_json::from_str(&message)
.context("Failed to parse message")
.map_err(server_error)?;
match message {
WebSocketClientMessage::Handshake(handshake) => {
let user = auth(state, handshake.token.trim(), vault_id)?;
Ok(AuthenticatedWebSocketHandshake { handshake, user })
}
WebSocketClientMessage::CursorPositions(_) => Err(unauthenticated_error(
anyhow::anyhow!("Expected a handshake message"),
)),
}
} else {
Err(unauthenticated_error(anyhow::anyhow!(
"Failed to authenticate due to invalid message"
)))
}
}
pub async fn get_unseen_documents(
state: &AppState,
vault_id: &VaultId,
last_seen_vault_update_id: Option<VaultUpdateId>,
) -> Result<Vec<DocumentVersionWithoutContent>, SyncServerError> {
if let Some(update_id) = last_seen_vault_update_id {
state
.database
.get_latest_documents_since(vault_id, update_id, None)
.await
.map_err(server_error)
} else {
state
.database
.get_latest_documents(vault_id, None)
.await
.map_err(server_error)
}
}
pub async fn send_update_over_websocket(
update: &WebSocketServerMessage,
sender: &mut SplitSink<WebSocket, Message>,
) -> Result<(), SyncServerError> {
let serialized_update = serde_json::to_string(update)
.context("Failed to serialize update")
.map_err(server_error)?;
sender
.send(Message::Text(serialized_update))
.await
.context("Failed to send message over websocket")
.map_err(server_error)
}

View file

@ -1,2 +0,0 @@
pub mod args;
pub mod color_when;

View file

@ -1,26 +0,0 @@
use std::ffi::OsString;
use clap::Parser;
use clap_verbosity_flag::{InfoLevel, Verbosity};
use crate::cli::color_when::ColorWhen;
/// Server for backing the `VaultLink` plugin
#[derive(Parser, Debug)]
#[command(version, about, long_about = None)]
pub struct Args {
#[arg(index = 1)]
pub config_path: Option<OsString>,
#[command(flatten)]
pub verbose: Verbosity<InfoLevel>,
#[arg(
long,
value_name = "WHEN",
default_value_t = ColorWhen::Auto,
default_missing_value = "always",
value_enum
)]
pub color: ColorWhen,
}

View file

@ -1,31 +0,0 @@
use std::io::IsTerminal;
use clap::ValueEnum;
#[derive(ValueEnum, Copy, Clone, Debug, PartialEq, Eq)]
pub enum ColorWhen {
Always,
Auto,
Never,
}
impl ColorWhen {
pub fn use_colors(self) -> bool {
match self {
ColorWhen::Always => true,
ColorWhen::Auto => {
std::env::var_os("NO_COLOR").is_none() && std::io::stderr().is_terminal()
}
ColorWhen::Never => false,
}
}
}
impl std::fmt::Display for ColorWhen {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.to_possible_value()
.expect("no values are skipped")
.get_name()
.fmt(f)
}
}

View file

@ -1,66 +0,0 @@
use std::path::Path;
use anyhow::{Context as _, Result};
use database_config::DatabaseConfig;
use log::info;
use serde::{Deserialize, Serialize};
use server_config::ServerConfig;
use tokio::fs;
use user_config::UserConfig;
pub mod database_config;
pub mod server_config;
pub mod user_config;
#[derive(Debug, Deserialize, Serialize, Clone, Default)]
pub struct Config {
#[serde(default)]
pub database: DatabaseConfig,
#[serde(default)]
pub server: ServerConfig,
#[serde(default)]
pub users: UserConfig,
}
impl Config {
pub async fn read_or_create(path: &Path) -> Result<Self> {
let config = if path.exists() {
info!(
"Loading configuration from '{}'",
path.canonicalize().unwrap().display()
);
Self::load_from_file(path).await?
} else {
Self::default()
};
config.write(path).await?;
info!(
"Updated configuration at '{}'",
path.canonicalize().unwrap().display()
);
Ok(config)
}
pub async fn load_from_file(path: &Path) -> Result<Self> {
let contents = fs::read_to_string(path).await.with_context(|| {
format!(
"Cannot load configuration from disk from {}",
path.display()
)
})?;
let config = serde_yaml::from_str(&contents).context("Failed to parse configuration")?;
Ok(config)
}
pub async fn write(&self, path: &Path) -> Result<()> {
let contents = serde_yaml::to_string(&self).context("Failed to serialize configuration")?;
fs::write(path, contents)
.await
.context("Failed to write configuration to disk")
}
}

View file

@ -1,48 +0,0 @@
use std::{path::PathBuf, time::Duration};
use log::debug;
use serde::{Deserialize, Serialize};
use serde_with::serde_as;
use crate::consts::{
DEFAULT_CURSOR_TIMEOUT, DEFAULT_DATABASES_DIRECTORY_PATH, DEFAULT_MAX_CONNECTIONS_PER_VAULT,
};
#[serde_with::serde_as]
#[derive(Debug, Deserialize, Serialize, Clone)]
pub struct DatabaseConfig {
#[serde(default = "default_databases_directory_path")]
pub databases_directory_path: PathBuf,
#[serde(default = "default_max_connections_per_vault")]
pub max_connections_per_vault: u32,
#[serde(default = "default_cursor_timeout", rename = "cursor_timeout_seconds")]
#[serde_as(as = "serde_with::DurationSeconds<u64>")]
pub cursor_timeout: Duration,
}
fn default_databases_directory_path() -> PathBuf {
debug!("Using default databases directory path: {DEFAULT_DATABASES_DIRECTORY_PATH:?}");
PathBuf::from(DEFAULT_DATABASES_DIRECTORY_PATH)
}
fn default_max_connections_per_vault() -> u32 {
debug!("Using default max connections: {DEFAULT_MAX_CONNECTIONS_PER_VAULT}");
DEFAULT_MAX_CONNECTIONS_PER_VAULT
}
fn default_cursor_timeout() -> Duration {
debug!("Using default cursor timeout: {DEFAULT_CURSOR_TIMEOUT:?}");
DEFAULT_CURSOR_TIMEOUT
}
impl Default for DatabaseConfig {
fn default() -> Self {
Self {
databases_directory_path: default_databases_directory_path(),
max_connections_per_vault: default_max_connections_per_vault(),
cursor_timeout: default_cursor_timeout(),
}
}
}

View file

@ -1,50 +0,0 @@
use log::debug;
use serde::{Deserialize, Serialize};
use crate::consts::{
DEFAULT_HOST, DEFAULT_MAX_BODY_SIZE_MB, DEFAULT_MAX_CLIENTS_PER_VAULT, DEFAULT_PORT,
DEFAULT_RESPONSE_TIMEOUT_SECONDS,
};
#[derive(Debug, Deserialize, Serialize, Clone, Default)]
pub struct ServerConfig {
#[serde(default = "default_host")]
pub host: String,
#[serde(default = "default_port")]
pub port: u16,
#[serde(default = "default_max_body_size_mb")]
pub max_body_size_mb: usize,
#[serde(default = "default_max_clients_per_vault")]
pub max_clients_per_vault: usize,
#[serde(default = "default_response_timeout_seconds")]
pub response_timeout_seconds: u64,
}
fn default_host() -> String {
debug!("Using default server host: {DEFAULT_HOST}");
DEFAULT_HOST.to_owned()
}
fn default_port() -> u16 {
debug!("Using default server port: {DEFAULT_PORT}");
DEFAULT_PORT
}
fn default_max_body_size_mb() -> usize {
debug!("Using default max body size (MB): {DEFAULT_MAX_BODY_SIZE_MB}");
DEFAULT_MAX_BODY_SIZE_MB
}
fn default_max_clients_per_vault() -> usize {
debug!("Using default max clients per vault: {DEFAULT_MAX_CLIENTS_PER_VAULT}");
DEFAULT_MAX_CLIENTS_PER_VAULT
}
fn default_response_timeout_seconds() -> u64 {
debug!("Using default response timeout (seconds): {DEFAULT_RESPONSE_TIMEOUT_SECONDS}");
DEFAULT_RESPONSE_TIMEOUT_SECONDS
}

View file

@ -1,164 +0,0 @@
use bimap::BiHashMap;
use rand::{Rng, distr::Alphanumeric, rng};
use serde::{Deserialize, Deserializer, Serialize, de::Error};
use crate::app_state::database::models::VaultId;
#[derive(Debug, Deserialize, Serialize, Clone)]
pub struct UserConfig {
#[serde(default = "default_users", deserialize_with = "validate_users")]
pub user_configs: Vec<User>,
}
fn validate_users<'de, D>(deserializer: D) -> Result<Vec<User>, D::Error>
where
D: Deserializer<'de>,
{
let users = Vec::<User>::deserialize(deserializer)?;
let mut user_token_map = BiHashMap::new();
for user in &users {
if let Some(existing_name) = user_token_map.get_by_right(&user.token) {
return Err(D::Error::custom(format!(
"Duplicate user token found: '{}' for users '{}' and '{}'. User tokens must be \
unique.",
user.token, existing_name, user.name
)));
}
if user_token_map.contains_left(&user.name) {
return Err(D::Error::custom(format!(
"Duplicate user name found: '{}'. User names must be unique.",
user.name
)));
}
user_token_map.insert(user.name.clone(), user.token.clone());
}
Ok(users)
}
impl UserConfig {
pub fn get_user(&self, token: &str) -> Option<&User> {
self.user_configs.iter().find(|u| u.token == token)
}
}
#[derive(Debug, Deserialize, Serialize, Clone)]
pub struct User {
pub name: String,
pub token: String,
pub vault_access: VaultAccess,
}
impl Default for UserConfig {
fn default() -> Self {
Self {
user_configs: default_users(),
}
}
}
#[derive(Debug, Deserialize, Serialize, Clone, Default)]
#[serde(rename_all = "snake_case", tag = "type")]
pub enum VaultAccess {
#[default]
AllowAccessToAll,
AllowList(AllowListedVaults),
}
#[derive(Debug, Deserialize, Serialize, Clone, Default)]
pub struct AllowListedVaults {
pub allowed: Vec<VaultId>,
}
fn default_users() -> Vec<User> {
vec![User {
name: "admin".to_owned(),
token: get_random_token(),
vault_access: VaultAccess::default(),
}]
}
pub fn get_random_token() -> String {
rng()
.sample_iter(&Alphanumeric)
.take(64)
.map(char::from)
.collect()
}
#[cfg(test)]
mod tests {
use serde_json::json;
use super::*;
#[test]
fn test_validate_users_unique_names_and_tokens() {
let config_json = json!({
"user_configs": [
{
"name": "alice",
"token": "token1",
"vault_access": { "type": "allow_access_to_all" }
},
{
"name": "bob",
"token": "token2",
"vault_access": { "type": "allow_access_to_all" }
}
]
});
let config: Result<UserConfig, _> = serde_json::from_value(config_json);
assert!(config.is_ok());
}
#[test]
fn test_validate_users_duplicate_names() {
let config_json = json!({
"user_configs": [
{
"name": "alice",
"token": "token1",
"vault_access": { "type": "allow_access_to_all" }
},
{
"name": "alice",
"token": "token2",
"vault_access": { "type": "allow_access_to_all" }
}
]
});
let config: Result<UserConfig, _> = serde_json::from_value(config_json);
assert!(config.is_err());
let err = config.unwrap_err().to_string();
assert!(err.contains("Duplicate user name found"));
}
#[test]
fn test_validate_users_duplicate_tokens() {
let config_json = json!({
"user_configs": [
{
"name": "alice",
"token": "token1",
"vault_access": { "type": "allow_access_to_all" }
},
{
"name": "bob",
"token": "token1",
"vault_access": { "type": "allow_access_to_all" }
}
]
});
let config: Result<UserConfig, _> = serde_json::from_value(config_json);
assert!(config.is_err());
let err = config.unwrap_err().to_string();
assert!(err.contains("Duplicate user token found"));
}
}

View file

@ -1,13 +0,0 @@
use std::time::Duration;
pub const DEFAULT_CONFIG_PATH: &str = "config.yml";
pub const DEFAULT_DATABASES_DIRECTORY_PATH: &str = "databases";
pub const DEFAULT_MAX_CONNECTIONS_PER_VAULT: u32 = 12;
pub const DEFAULT_CURSOR_TIMEOUT: Duration = Duration::from_secs(60);
pub const DEFAULT_HOST: &str = "127.0.0.1";
pub const DEFAULT_PORT: u16 = 3000;
pub const DEFAULT_MAX_BODY_SIZE_MB: usize = 4096;
pub const DEFAULT_RESPONSE_TIMEOUT_SECONDS: u64 = 60;
pub const DEFAULT_MAX_CLIENTS_PER_VAULT: usize = 256;

View file

@ -1,140 +0,0 @@
use std::fmt::Display;
use axum::{
Json,
http::StatusCode,
response::{IntoResponse, Response},
};
use log::{debug, error};
use serde::Serialize;
use thiserror::Error;
use ts_rs::TS;
#[derive(Error, Debug)]
pub enum SyncServerError {
#[error("Initialisation error: {0}")]
InitError(#[source] anyhow::Error),
#[error("Client error: {0:?}")]
ClientError(#[source] anyhow::Error),
#[error("Server error: {0:?}")]
ServerError(#[source] anyhow::Error),
#[error("Not found: {0}")]
NotFound(#[source] anyhow::Error),
#[error("Unauthorized: {0}")]
Unauthenticated(#[source] anyhow::Error),
#[error("Permission denied error: {0}")]
PermissionDeniedError(#[source] anyhow::Error),
}
impl SyncServerError {
pub fn serialize(&self) -> SerializedError {
match self {
Self::InitError(error)
| Self::ClientError(error)
| Self::ServerError(error)
| Self::NotFound(error)
| Self::Unauthenticated(error)
| Self::PermissionDeniedError(error) => error.into(),
}
}
}
#[derive(TS, Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
#[ts(export)]
pub struct SerializedError {
pub error_type: &'static str,
pub message: String,
pub causes: Vec<String>,
}
impl Display for SerializedError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if !self.causes.is_empty() {
write!(f, "\nCauses:\n")?;
for cause in &self.causes {
write!(f, "{}", &format!("- {cause}\n"))?;
}
}
Ok(())
}
}
impl IntoResponse for SyncServerError {
fn into_response(self) -> Response {
let body = Json(self.serialize());
match self {
Self::InitError(_) | Self::ServerError(_) => {
(StatusCode::INTERNAL_SERVER_ERROR, body).into_response()
}
Self::ClientError(_) => (StatusCode::BAD_REQUEST, body).into_response(),
Self::NotFound(_) => (StatusCode::NOT_FOUND, body).into_response(),
Self::Unauthenticated(_) => (StatusCode::UNAUTHORIZED, body).into_response(),
Self::PermissionDeniedError(_) => (StatusCode::FORBIDDEN, body).into_response(),
}
}
}
impl From<&anyhow::Error> for SerializedError {
fn from(error: &anyhow::Error) -> SerializedError {
let mut causes = vec![];
let mut current_error = error.source();
while let Some(error) = current_error {
causes.push(error.to_string());
current_error = error.source();
}
SerializedError {
error_type: error.downcast_ref::<SyncServerError>().map_or(
"UnknownError",
|e| match e {
SyncServerError::InitError(_) => "InitError",
SyncServerError::ClientError(_) => "ClientError",
SyncServerError::ServerError(_) => "ServerError",
SyncServerError::NotFound(_) => "NotFound",
SyncServerError::Unauthenticated(_) => "Unauthenticated",
SyncServerError::PermissionDeniedError(_) => "PermissionDeniedError",
},
),
message: error.to_string(),
causes,
}
}
}
pub fn init_error(error: anyhow::Error) -> SyncServerError {
debug!("Initialization error: {error:?}");
SyncServerError::InitError(error)
}
pub fn server_error(error: anyhow::Error) -> SyncServerError {
debug!("Server error: {error:?}");
SyncServerError::ServerError(error)
}
pub fn client_error(error: anyhow::Error) -> SyncServerError {
debug!("Client error: {error:?}");
SyncServerError::ClientError(error)
}
pub fn not_found_error(error: anyhow::Error) -> SyncServerError {
debug!("Not found: {error:?}");
SyncServerError::NotFound(error)
}
pub fn unauthenticated_error(error: anyhow::Error) -> SyncServerError {
debug!("Unauthenticated user: {error:?}");
SyncServerError::Unauthenticated(error)
}
pub fn permission_denied_error(error: anyhow::Error) -> SyncServerError {
debug!("Permission denied: {error:?}");
SyncServerError::PermissionDeniedError(error)
}

View file

@ -1,86 +0,0 @@
mod app_state;
mod cli;
mod config;
mod consts;
mod errors;
mod server;
mod utils;
use std::process::ExitCode;
use anyhow::{Context as _, Result};
use clap::Parser;
use cli::args::Args;
use errors::{SyncServerError, init_error};
use log::info;
use server::create_server;
use tracing_subscriber::{EnvFilter, fmt::format, util::SubscriberInitExt};
#[tokio::main]
async fn main() -> ExitCode {
let args = Args::parse();
let mut result = set_up_logging(&args);
if result.is_ok() {
result = start_server(args).await;
}
match result {
Ok(()) => ExitCode::SUCCESS,
Err(e) => {
eprintln!("{}", e.serialize());
ExitCode::FAILURE
}
}
}
fn set_up_logging(args: &Args) -> Result<(), SyncServerError> {
let level_filter = match args.verbose.log_level_filter() {
// We don't want to allow disabling all logging
log::LevelFilter::Off | log::LevelFilter::Error => tracing::Level::ERROR,
log::LevelFilter::Warn => tracing::Level::WARN,
log::LevelFilter::Info => tracing::Level::INFO,
log::LevelFilter::Debug => tracing::Level::DEBUG,
log::LevelFilter::Trace => tracing::Level::TRACE,
};
let env_filter = EnvFilter::builder()
.with_default_directive(level_filter.into())
.from_env()
.context("Failed to create logging env filter")
.map_err(init_error)?;
let use_colors = args.color.use_colors();
let is_debug_mode = args.verbose.log_level_filter() >= log::LevelFilter::Debug;
tracing_subscriber::fmt()
.with_ansi(use_colors)
.with_env_filter(env_filter)
.event_format(
format()
.without_time()
.with_target(is_debug_mode)
.with_line_number(is_debug_mode)
.compact(),
)
.finish()
.try_init()
.context("Failed to initialise tracing")
.map_err(init_error)?;
Ok(())
}
async fn start_server(args: Args) -> Result<(), SyncServerError> {
info!(
"Starting VaultLink server version {}",
env!("CARGO_PKG_VERSION")
);
create_server(args.config_path)
.await
.context("Failed to start server")
.map_err(init_error)
}

View file

@ -1,184 +0,0 @@
pub mod auth;
mod create_document;
mod delete_document;
mod device_id_header;
mod fetch_document_version;
mod fetch_document_version_content;
mod fetch_latest_document_version;
mod fetch_latest_documents;
mod index;
mod ping;
mod requests;
mod responses;
mod update_document;
mod websocket;
use std::{ffi::OsString, time::Duration};
use anyhow::{Context as _, Result, anyhow};
use auth::auth_middleware;
use axum::{
Router,
extract::{DefaultBodyLimit, Request},
http::{self, HeaderValue, Method},
middleware,
response::IntoResponse,
routing::{IntoMakeService, delete, get, post, put},
};
use device_id_header::DEVICE_ID_HEADER_NAME;
use log::info;
use tokio::signal;
use tower_http::{
LatencyUnit,
cors::CorsLayer,
limit::RequestBodyLimitLayer,
timeout::TimeoutLayer,
trace::{
DefaultOnBodyChunk, DefaultOnEos, DefaultOnFailure, DefaultOnRequest, DefaultOnResponse,
TraceLayer,
},
};
use tracing::{Level, info_span};
use crate::{
app_state::AppState,
config::server_config::ServerConfig,
errors::{client_error, not_found_error},
};
pub async fn create_server(config_path: Option<OsString>) -> Result<()> {
let app_state = AppState::try_new(config_path)
.await
.context("Failed to initialise app state")?;
let server_config = app_state.config.server.clone();
let app = Router::new()
.nest("/", get_authed_routes(app_state.clone()))
.route("/", get(index::index))
.route("/vaults/:vault_id/ping", get(ping::ping))
.route("/vaults/:vault_id/ws", get(websocket::websocket_handler))
.layer(DefaultBodyLimit::disable())
.layer(RequestBodyLimitLayer::new(
app_state.config.server.max_body_size_mb * 1024 * 1024,
))
.layer(TimeoutLayer::new(Duration::from_secs(
server_config.response_timeout_seconds,
)))
.layer(
CorsLayer::new()
.allow_origin("*".parse::<HeaderValue>().expect("Failed to parse origin"))
.allow_headers([
http::header::CONTENT_TYPE,
http::header::AUTHORIZATION,
DEVICE_ID_HEADER_NAME.clone(),
])
.allow_methods([Method::GET, Method::POST, Method::PUT, Method::DELETE]),
)
.layer(
TraceLayer::new_for_http()
.make_span_with(|request: &Request<_>| {
info_span!(
"http",
method = ?request.method(),
uri = ?request.uri(),
)
})
.on_request(DefaultOnRequest::new().level(Level::INFO))
.on_response(
DefaultOnResponse::new()
.level(Level::INFO)
.latency_unit(LatencyUnit::Millis),
)
.on_body_chunk(DefaultOnBodyChunk::new())
.on_eos(DefaultOnEos::new())
.on_failure(DefaultOnFailure::new().level(Level::ERROR)),
)
.with_state(app_state)
.fallback(handle_404)
.fallback(handle_405)
.into_make_service();
start_server(app, &server_config).await
}
fn get_authed_routes(app_state: AppState) -> Router<AppState> {
Router::new()
.route(
"/vaults/:vault_id/documents",
get(fetch_latest_documents::fetch_latest_documents),
)
.route(
"/vaults/:vault_id/documents",
post(create_document::create_document),
)
.route(
"/vaults/:vault_id/documents/:document_id",
get(fetch_latest_document_version::fetch_latest_document_version),
)
.route(
"/vaults/:vault_id/documents/:document_id",
put(update_document::update_document),
)
.route(
"/vaults/:vault_id/documents/:document_id/versions/:version_id",
put(fetch_document_version::fetch_document_version),
)
.route(
"/vaults/:vault_id/documents/:document_id/versions/:version_id/content",
put(fetch_document_version_content::fetch_document_version_content),
)
.route(
"/vaults/:vault_id/documents/:document_id",
delete(delete_document::delete_document),
)
.layer(middleware::from_fn_with_state(app_state, auth_middleware))
}
async fn start_server(app: IntoMakeService<axum::Router>, config: &ServerConfig) -> Result<()> {
let address = format!("{}:{}", config.host, config.port);
let listener = tokio::net::TcpListener::bind(address.clone())
.await
.with_context(|| format!("Failed to bind to address: {address}"))?;
info!(
"Listening on http://{}",
listener
.local_addr()
.context("Failed to get local address")?
);
axum::serve(listener, app)
.with_graceful_shutdown(shutdown_signal())
.tcp_nodelay(true)
.await
.context("Failed to start server")
}
async fn shutdown_signal() {
let ctrl_c = async {
signal::ctrl_c()
.await
.expect("failed to install Ctrl+C handler");
};
#[cfg(unix)]
let terminate = async {
signal::unix::signal(signal::unix::SignalKind::terminate())
.expect("failed to install signal handler")
.recv()
.await;
};
#[cfg(not(unix))]
let terminate = std::future::pending::<()>();
tokio::select! {
() = ctrl_c => {},
() = terminate => {},
}
}
async fn handle_404() -> impl IntoResponse { not_found_error(anyhow!("Page not found")) }
async fn handle_405() -> impl IntoResponse { client_error(anyhow!("Method not allowed")) }

View file

@ -1,9 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<title>VaultLink</title>
</head>
<body>
<h1>VaultLink server</h1>
</body>
</html>

View file

@ -1,70 +0,0 @@
use std::collections::HashMap;
use axum::{
extract::{Path, Request, State},
middleware::Next,
response::Response,
};
use axum_extra::{
TypedHeader,
headers::{Authorization, authorization::Bearer},
};
use log::info;
use crate::{
app_state::{AppState, database::models::VaultId},
config::user_config::{AllowListedVaults, User, VaultAccess},
errors::{SyncServerError, permission_denied_error, unauthenticated_error},
utils::normalize::normalize_string,
};
pub async fn auth_middleware(
State(state): State<AppState>,
Path(path_params): Path<HashMap<String, String>>,
TypedHeader(auth_header): TypedHeader<Authorization<Bearer>>,
mut req: Request,
next: Next,
) -> Result<Response, SyncServerError> {
let token = auth_header.token().trim();
let vault_id = normalize_string(
path_params
.get("vault_id")
.ok_or_else(|| unauthenticated_error(anyhow::anyhow!("Missing vault_id")))?,
);
let user = auth(&state, token, &vault_id)?;
req.extensions_mut().insert(user);
Ok(next.run(req).await)
}
pub fn auth(state: &AppState, token: &str, vault_id: &VaultId) -> Result<User, SyncServerError> {
let user = state
.config
.users
.get_user(token)
.cloned()
.ok_or_else(|| unauthenticated_error(anyhow::anyhow!("Invalid token")))?;
if match user.vault_access {
VaultAccess::AllowAccessToAll => true,
VaultAccess::AllowList(AllowListedVaults { ref allowed }) => allowed.contains(vault_id),
} {
info!(
"User '{}' is authenticated and is authorised to access to vault '{vault_id}'",
user.name
);
Ok(user)
} else {
info!(
"User '{}' is authenticated but is not authorised to access vault '{vault_id}'",
user.name
);
Err(permission_denied_error(anyhow::anyhow!(
"Permission denied for vault `{vault_id}`"
)))
}
}

View file

@ -1,95 +0,0 @@
use anyhow::Context as _;
use axum::{
Extension, Json,
extract::{Path, State},
};
use axum_extra::TypedHeader;
use axum_typed_multipart::TypedMultipart;
use serde::Deserialize;
use super::{device_id_header::DeviceIdHeader, requests::CreateDocumentVersion};
use crate::{
app_state::{
AppState,
database::models::{DocumentVersionWithoutContent, StoredDocumentVersion, VaultId},
},
config::user_config::User,
errors::{SyncServerError, client_error, server_error},
utils::{normalize::normalize, sanitize_path::sanitize_path},
};
#[derive(Deserialize)]
pub struct CreateDocumentPathParams {
#[serde(deserialize_with = "normalize")]
vault_id: VaultId,
}
/// Create a new document in case a document with the same doesn't exist
/// already. If a document with the same path exists, a new version is created
/// with their content merged.
#[axum::debug_handler]
pub async fn create_document(
Path(CreateDocumentPathParams { vault_id }): Path<CreateDocumentPathParams>,
Extension(user): Extension<User>,
TypedHeader(device_id): TypedHeader<DeviceIdHeader>,
State(state): State<AppState>,
TypedMultipart(request): TypedMultipart<CreateDocumentVersion>,
) -> Result<Json<DocumentVersionWithoutContent>, SyncServerError> {
let mut transaction = state
.database
.create_write_transaction(&vault_id)
.await
.map_err(server_error)?;
let document_id = match request.document_id {
Some(document_id) => {
let existing_version = state
.database
.get_latest_document(&vault_id, &document_id, Some(&mut transaction))
.await
.map_err(server_error)?;
if existing_version.is_some() {
return Err(client_error(anyhow::anyhow!(
"Document with the same ID already exists"
)));
}
document_id
}
None => uuid::Uuid::new_v4(),
};
let last_update_id = state
.database
.get_max_update_id_in_vault(&vault_id, Some(&mut transaction))
.await
.map_err(server_error)?;
let sanitized_relative_path = sanitize_path(&request.relative_path);
let new_version = StoredDocumentVersion {
vault_update_id: last_update_id + 1,
document_id,
relative_path: sanitized_relative_path,
content: request.content.contents.to_vec(),
updated_date: chrono::Utc::now(),
is_deleted: false,
user_id: user.name,
device_id: device_id.0,
};
state
.database
.insert_document_version(&vault_id, &new_version, Some(&mut transaction))
.await
.map_err(server_error)?;
transaction
.commit()
.await
.context("Failed to commit successful transaction")
.map_err(server_error)?;
Ok(Json(new_version.into()))
}

View file

@ -1,84 +0,0 @@
use anyhow::Context as _;
use axum::{
Extension, Json,
extract::{Path, State},
};
use axum_extra::TypedHeader;
use serde::Deserialize;
use super::{device_id_header::DeviceIdHeader, requests::DeleteDocumentVersion};
use crate::{
app_state::{
AppState,
database::models::{
DocumentId, DocumentVersionWithoutContent, StoredDocumentVersion, VaultId,
},
},
config::user_config::User,
errors::{SyncServerError, server_error},
utils::{normalize::normalize, sanitize_path::sanitize_path},
};
#[derive(Deserialize)]
pub struct DeleteDocumentPathParams {
#[serde(deserialize_with = "normalize")]
vault_id: VaultId,
document_id: DocumentId,
}
#[axum::debug_handler]
pub async fn delete_document(
Path(DeleteDocumentPathParams {
vault_id,
document_id,
}): Path<DeleteDocumentPathParams>,
Extension(user): Extension<User>,
TypedHeader(device_id): TypedHeader<DeviceIdHeader>,
State(state): State<AppState>,
Json(request): Json<DeleteDocumentVersion>,
) -> Result<Json<DocumentVersionWithoutContent>, SyncServerError> {
let mut transaction = state
.database
.create_write_transaction(&vault_id)
.await
.map_err(server_error)?;
let last_update_id = state
.database
.get_max_update_id_in_vault(&vault_id, Some(&mut transaction))
.await
.map_err(server_error)?;
let latest_content = state
.database
.get_latest_document(&vault_id, &document_id, Some(&mut transaction))
.await
.map_err(server_error)?
.map_or_else(Vec::new, |version| version.content); // in case the document has never existed before deleting it
let new_version = StoredDocumentVersion {
vault_update_id: last_update_id + 1,
document_id,
relative_path: sanitize_path(&request.relative_path),
content: latest_content, // copy the content from the latest version
updated_date: chrono::Utc::now(),
is_deleted: true,
user_id: user.name,
device_id: device_id.0,
};
state
.database
.insert_document_version(&vault_id, &new_version, Some(&mut transaction))
.await
.map_err(server_error)?;
transaction
.commit()
.await
.context("Failed to commit successful transaction")
.map_err(server_error)?;
Ok(Json(new_version.into()))
}

View file

@ -1,33 +0,0 @@
use axum_extra::headers;
use headers::{Header, HeaderName, HeaderValue};
pub struct DeviceIdHeader(pub String);
pub static DEVICE_ID_HEADER_NAME: HeaderName = HeaderName::from_static("device-id");
impl Header for DeviceIdHeader {
fn name() -> &'static HeaderName { &DEVICE_ID_HEADER_NAME }
fn decode<'i, I>(values: &mut I) -> Result<Self, headers::Error>
where
I: Iterator<Item = &'i HeaderValue>,
{
let value = values.next().ok_or_else(headers::Error::invalid)?;
Ok(DeviceIdHeader(
value
.to_str()
.map_err(|_| headers::Error::invalid())?
.to_owned(),
))
}
fn encode<E>(&self, values: &mut E)
where
E: Extend<HeaderValue>,
{
let value = HeaderValue::from_static(Box::leak(self.0.to_string().into_boxed_str()));
values.extend(std::iter::once(value));
}
}

View file

@ -1,57 +0,0 @@
use anyhow::anyhow;
use axum::{
Json,
extract::{Path, State},
};
use serde::Deserialize;
use crate::{
app_state::{
AppState,
database::models::{DocumentId, DocumentVersion, VaultId, VaultUpdateId},
},
errors::{SyncServerError, not_found_error, server_error},
utils::normalize::normalize,
};
#[derive(Deserialize)]
pub struct FetchDocumentVersionPathParams {
#[serde(deserialize_with = "normalize")]
vault_id: VaultId,
document_id: DocumentId,
vault_update_id: VaultUpdateId,
}
#[axum::debug_handler]
pub async fn fetch_document_version(
Path(FetchDocumentVersionPathParams {
vault_id,
document_id,
vault_update_id,
}): Path<FetchDocumentVersionPathParams>,
State(state): State<AppState>,
) -> Result<Json<DocumentVersion>, SyncServerError> {
let result = state
.database
.get_document_version(&vault_id, vault_update_id, None)
.await
.map_err(server_error)?
.map_or_else(
|| {
Err(not_found_error(anyhow!(
"Document with vault update id `{vault_update_id}` not found",
)))
},
Ok,
)?;
if result.document_id != document_id {
return Err(not_found_error(anyhow!(
"Document with document id `{document_id}` does not have a version with id \
`{vault_update_id}`",
)));
}
Ok(Json(result.into()))
}

View file

@ -1,57 +0,0 @@
use anyhow::anyhow;
use axum::{
body::Bytes,
extract::{Path, State},
};
use serde::Deserialize;
use crate::{
app_state::{
AppState,
database::models::{DocumentId, VaultId, VaultUpdateId},
},
errors::{SyncServerError, not_found_error, server_error},
utils::normalize::normalize,
};
#[derive(Deserialize)]
pub struct FetchDocumentVersionContentPathParams {
#[serde(deserialize_with = "normalize")]
vault_id: VaultId,
document_id: DocumentId,
vault_update_id: VaultUpdateId,
}
#[axum::debug_handler]
pub async fn fetch_document_version_content(
Path(FetchDocumentVersionContentPathParams {
vault_id,
document_id,
vault_update_id,
}): Path<FetchDocumentVersionContentPathParams>,
State(state): State<AppState>,
) -> Result<Bytes, SyncServerError> {
let result = state
.database
.get_document_version(&vault_id, vault_update_id, None)
.await
.map_err(server_error)?
.map_or_else(
|| {
Err(not_found_error(anyhow!(
"Document with vault update id `{vault_update_id}` not found",
)))
},
Ok,
)?;
if result.document_id != document_id {
return Err(not_found_error(anyhow!(
"Document with document id `{document_id}` does not have a version with id \
`{vault_update_id}`",
)));
}
Ok(result.content.into())
}

View file

@ -1,48 +0,0 @@
use anyhow::anyhow;
use axum::{
Json,
extract::{Path, State},
};
use serde::Deserialize;
use crate::{
app_state::{
AppState,
database::models::{DocumentId, DocumentVersion, VaultId},
},
errors::{SyncServerError, not_found_error, server_error},
utils::normalize::normalize,
};
#[derive(Deserialize)]
pub struct FetchLatestDocumentVersionPathParams {
#[serde(deserialize_with = "normalize")]
vault_id: VaultId,
document_id: DocumentId,
}
#[axum::debug_handler]
pub async fn fetch_latest_document_version(
Path(FetchLatestDocumentVersionPathParams {
vault_id,
document_id,
}): Path<FetchLatestDocumentVersionPathParams>,
State(state): State<AppState>,
) -> Result<Json<DocumentVersion>, SyncServerError> {
let latest_version = state
.database
.get_latest_document(&vault_id, &document_id, None)
.await
.map_err(server_error)?
.map_or_else(
|| {
Err(not_found_error(anyhow!(
"Document with id `{document_id}` not found",
)))
},
Ok,
)?;
Ok(Json(latest_version.into()))
}

View file

@ -1,56 +0,0 @@
use axum::{
Json,
extract::{Path, Query, State},
};
use serde::Deserialize;
use super::responses::FetchLatestDocumentsResponse;
use crate::{
app_state::{
AppState,
database::models::{VaultId, VaultUpdateId},
},
errors::{SyncServerError, server_error},
utils::normalize::normalize,
};
#[derive(Deserialize)]
pub struct FetchLatestDocumentsPathParams {
#[serde(deserialize_with = "normalize")]
vault_id: VaultId,
}
#[derive(Deserialize)]
pub struct QueryParams {
since_update_id: Option<VaultUpdateId>,
}
#[axum::debug_handler]
pub async fn fetch_latest_documents(
Path(FetchLatestDocumentsPathParams { vault_id }): Path<FetchLatestDocumentsPathParams>,
Query(QueryParams { since_update_id }): Query<QueryParams>,
State(state): State<AppState>,
) -> Result<Json<FetchLatestDocumentsResponse>, SyncServerError> {
let documents = if let Some(since_update_id) = since_update_id {
state
.database
.get_latest_documents_since(&vault_id, since_update_id, None)
.await
.map_err(server_error)
} else {
state
.database
.get_latest_documents(&vault_id, None)
.await
.map_err(server_error)
}?;
Ok(Json(FetchLatestDocumentsResponse {
last_update_id: documents
.iter()
.map(|doc| doc.vault_update_id)
.max()
.unwrap_or(since_update_id.unwrap_or(0)),
latest_documents: documents,
}))
}

View file

@ -1,7 +0,0 @@
use axum::response::{Html, IntoResponse};
pub async fn index() -> impl IntoResponse {
const HTML_CONTENT: &str = include_str!("./assets/index.html");
let html_content = HTML_CONTENT;
Html(html_content)
}

View file

@ -1,37 +0,0 @@
use axum::{
Json,
extract::{Path, State},
};
use axum_extra::{
TypedHeader,
headers::{Authorization, authorization::Bearer},
};
use serde::Deserialize;
use super::{auth::auth, responses::PingResponse};
use crate::{
app_state::{AppState, database::models::VaultId},
errors::SyncServerError,
utils::normalize::normalize,
};
#[derive(Deserialize)]
pub struct PingPathParams {
#[serde(deserialize_with = "normalize")]
vault_id: VaultId,
}
#[axum::debug_handler]
pub async fn ping(
maybe_auth_header: Option<TypedHeader<Authorization<Bearer>>>,
Path(PingPathParams { vault_id }): Path<PingPathParams>,
State(state): State<AppState>,
) -> Result<Json<PingResponse>, SyncServerError> {
let is_authenticated = maybe_auth_header
.is_some_and(|auth_header| auth(&state, auth_header.token(), &vault_id).is_ok());
Ok(Json(PingResponse {
server_version: env!("CARGO_PKG_VERSION").to_owned(),
is_authenticated,
}))
}

View file

@ -1,39 +0,0 @@
use axum::body::Bytes;
use axum_typed_multipart::{FieldData, TryFromMultipart};
use serde::{self, Deserialize};
use ts_rs::TS;
use crate::app_state::database::models::{DocumentId, VaultUpdateId};
#[derive(TS, Debug, TryFromMultipart)]
#[ts(export)]
pub struct CreateDocumentVersion {
/// The client can decide the document id (if it wishes to) in order
/// to help with syncing. If the client does not provide a document id,
/// the server will generate one. If the client provides a document id
/// it must not already exist in the database.
pub document_id: Option<DocumentId>,
pub relative_path: String,
#[ts(as = "Vec<u8>")]
#[form_data(limit = "unlimited")]
pub content: FieldData<Bytes>,
}
#[derive(TS, Debug, TryFromMultipart)]
#[ts(export)]
pub struct UpdateDocumentVersion {
pub parent_version_id: VaultUpdateId,
pub relative_path: String,
#[ts(as = "Vec<u8>")]
#[form_data(limit = "unlimited")]
pub content: FieldData<Bytes>,
}
#[derive(TS, Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
#[ts(export)]
pub struct DeleteDocumentVersion {
pub relative_path: String,
}

View file

@ -1,45 +0,0 @@
use serde::{self, Serialize};
use ts_rs::TS;
use crate::app_state::database::models::{
DocumentVersion, DocumentVersionWithoutContent, VaultUpdateId,
};
/// Response to a ping request.
#[derive(TS, Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
#[ts(export)]
pub struct PingResponse {
/// Semantic version of the server.
pub server_version: String,
/// Whether the client is authenticated based on the sent Authorization
/// header.
pub is_authenticated: bool,
}
/// Response to a fetch latest documents request.
#[derive(TS, Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
#[ts(export)]
pub struct FetchLatestDocumentsResponse {
pub latest_documents: Vec<DocumentVersionWithoutContent>,
/// The update ID of the latest document in the response.
pub last_update_id: VaultUpdateId,
}
/// Response to an update document request.
#[derive(TS, Debug, Clone, Serialize)]
#[serde(tag = "type")]
#[ts(export)]
pub enum DocumentUpdateResponse {
/// Returned when the created/updated document's content is the same as was
/// sent in the create/update request and thus the response doesn't contain
/// the content because the client must already have it.
FastForwardUpdate(DocumentVersionWithoutContent),
/// Returned when the created/updated document's content is different from
/// what was sent in the create/update request.
MergingUpdate(DocumentVersion),
}

View file

@ -1,179 +0,0 @@
use anyhow::{Context as _, anyhow};
use axum::{
Extension, Json,
extract::{Path, State},
};
use axum_extra::TypedHeader;
use axum_typed_multipart::TypedMultipart;
use log::info;
use serde::Deserialize;
use sync_lib::{is_file_type_mergable, merge};
use super::{
device_id_header::DeviceIdHeader, requests::UpdateDocumentVersion,
responses::DocumentUpdateResponse,
};
use crate::{
app_state::{
AppState,
database::models::{DocumentId, StoredDocumentVersion, VaultId},
},
config::user_config::User,
errors::{SyncServerError, not_found_error, server_error},
utils::{dedup_paths::dedup_paths, normalize::normalize, sanitize_path::sanitize_path},
};
#[derive(Deserialize)]
pub struct UpdateDocumentPathParams {
#[serde(deserialize_with = "normalize")]
vault_id: VaultId,
document_id: DocumentId,
}
#[axum::debug_handler]
#[allow(clippy::too_many_lines)]
pub async fn update_document(
Path(UpdateDocumentPathParams {
vault_id,
document_id,
}): Path<UpdateDocumentPathParams>,
Extension(user): Extension<User>,
TypedHeader(device_id): TypedHeader<DeviceIdHeader>,
State(state): State<AppState>,
TypedMultipart(request): TypedMultipart<UpdateDocumentVersion>,
) -> Result<Json<DocumentUpdateResponse>, SyncServerError> {
// No need for a transaction as document versions are immutable
let parent_document = state
.database
.get_document_version(&vault_id, request.parent_version_id, None)
.await
.map_err(server_error)?
.map_or_else(
|| {
Err(not_found_error(anyhow!(
"Parent version with id `{}` not found",
request.parent_version_id
)))
},
Ok,
)?;
let sanitized_relative_path = sanitize_path(&request.relative_path);
let mut transaction = state
.database
.create_write_transaction(&vault_id)
.await
.map_err(server_error)?;
let last_update_id = state
.database
.get_max_update_id_in_vault(&vault_id, Some(&mut transaction))
.await
.map_err(server_error)?;
let latest_version = state
.database
.get_latest_document(&vault_id, &document_id, Some(&mut transaction))
.await
.map_err(server_error)?
.map_or_else(
|| {
Err(not_found_error(anyhow!(
"Document with id `{document_id}` not found",
)))
},
Ok,
)?;
if latest_version.is_deleted {
transaction
.rollback()
.await
.context("Failed to roll back transaction")
.map_err(server_error)?;
return Ok(Json(DocumentUpdateResponse::FastForwardUpdate(
latest_version.into(),
)));
}
let content = request.content.contents.to_vec();
// Return the latest version if the content and path are the same as the latest
// version
if content == latest_version.content && sanitized_relative_path == latest_version.relative_path
{
info!("Document content is the same as the latest version, skipping update");
transaction
.rollback()
.await
.context("Failed to roll back transaction")
.map_err(server_error)?;
return Ok(Json(DocumentUpdateResponse::FastForwardUpdate(
latest_version.into(),
)));
}
let merged_content = if is_file_type_mergable(&sanitized_relative_path) {
merge(&parent_document.content, &latest_version.content, &content)
} else {
content.clone()
};
let is_different_from_request_content = merged_content != content;
// We can only update the relative path if we're the first one to do so
let new_relative_path = if parent_document.relative_path == latest_version.relative_path
&& latest_version.relative_path != sanitized_relative_path
{
let mut new_relative_path = String::default();
for candidate in dedup_paths(&sanitized_relative_path) {
if state
.database
.get_latest_document_by_path(&vault_id, &candidate, Some(&mut transaction))
.await
.map_err(server_error)?
.is_none()
{
new_relative_path = candidate;
break;
}
}
new_relative_path
} else {
latest_version.relative_path.clone()
};
let new_version = StoredDocumentVersion {
document_id,
vault_update_id: last_update_id + 1,
relative_path: new_relative_path,
content: merged_content,
updated_date: chrono::Utc::now(),
is_deleted: false,
user_id: user.name,
device_id: device_id.0,
};
state
.database
.insert_document_version(&vault_id, &new_version, Some(&mut transaction))
.await
.map_err(server_error)?;
transaction
.commit()
.await
.context("Failed to commit successful transaction")
.map_err(server_error)?;
Ok(Json(if is_different_from_request_content {
DocumentUpdateResponse::MergingUpdate(new_version.into())
} else {
DocumentUpdateResponse::FastForwardUpdate(new_version.into())
}))
}

View file

@ -1,181 +0,0 @@
use anyhow::Context;
use axum::{
extract::{
Path, State,
ws::{Message, WebSocket, WebSocketUpgrade},
},
response::Response,
};
use futures::stream::StreamExt;
use log::{debug, info};
use serde::Deserialize;
use crate::{
app_state::{
AppState,
database::models::VaultId,
websocket::{
models::{
CursorPositionFromServer, WebSocketClientMessage, WebSocketServerMessage,
WebSocketVaultUpdate,
},
utils::{
get_authenticated_handshake, get_unseen_documents, send_update_over_websocket,
},
},
},
errors::{SyncServerError, client_error, server_error},
utils::normalize::normalize,
};
#[derive(Deserialize)]
pub struct WebSocketPathParams {
#[serde(deserialize_with = "normalize")]
vault_id: VaultId,
}
pub async fn websocket_handler(
ws: WebSocketUpgrade,
Path(WebSocketPathParams { vault_id }): Path<WebSocketPathParams>,
State(state): State<AppState>,
) -> Result<Response, SyncServerError> {
Ok(ws.on_upgrade(move |socket| websocket_wrapped(state, socket, vault_id)))
}
async fn websocket_wrapped(state: AppState, stream: WebSocket, vault_id: VaultId) {
info!("WebSocket connection opened on vault '{vault_id}'");
let result = websocket(state, stream, vault_id.clone()).await;
if let Err(err) = result {
debug!("WebSocket connection error on vault '{vault_id}': {err}");
}
}
#[allow(clippy::too_many_lines)]
async fn websocket(
state: AppState,
stream: WebSocket,
vault_id: VaultId,
) -> Result<(), SyncServerError> {
let (mut sender, mut websocket_receiver) = stream.split();
let authed_handshake = get_authenticated_handshake(
&state,
&vault_id,
websocket_receiver
.next()
.await
.transpose()
.unwrap_or_default(),
)?;
info!(
"WebSocket handshake successful for vault '{vault_id}' for '{}'",
authed_handshake.handshake.device_id
);
let mut broadcast_receiver = state.broadcasts.get_receiver(vault_id.clone()).await;
send_update_over_websocket(
&WebSocketServerMessage::VaultUpdate(WebSocketVaultUpdate {
documents: get_unseen_documents(
&state,
&vault_id,
authed_handshake.handshake.last_seen_vault_update_id,
)
.await?,
is_initial_sync: true,
}),
&mut sender,
)
.await?;
send_update_over_websocket(
&WebSocketServerMessage::CursorPositions(CursorPositionFromServer {
clients: state.cursors.get_cursors(&vault_id).await,
}),
&mut sender,
)
.await?;
let device_id = authed_handshake.handshake.device_id.clone();
let mut send_task = tokio::spawn(async move {
while let Ok(update) = broadcast_receiver.recv().await {
if Some(&device_id) == update.origin_device_id.as_ref() {
continue;
}
send_update_over_websocket(&update.message, &mut sender).await?;
}
Ok::<(), SyncServerError>(())
});
let device_id = authed_handshake.handshake.device_id.clone();
let vault_id_clone = vault_id.clone();
let cursor_manager = state.cursors.clone();
let mut receive_task = tokio::spawn(async move {
while let Some(Ok(Message::Text(message))) = websocket_receiver.next().await {
let message: WebSocketClientMessage = serde_json::from_str(&message)
.context("Failed to parse WebSocket message from client")
.map_err(server_error)?;
match message {
WebSocketClientMessage::Handshake(_) => {
return Err(client_error(anyhow::anyhow!(
"Unexpected handshake message"
)));
}
WebSocketClientMessage::CursorPositions(cursors) => {
cursor_manager
.update_cursors(
vault_id_clone.clone(),
authed_handshake.user.name.clone(),
&device_id,
cursors.document_to_cursors,
)
.await;
}
}
}
Ok::<(), SyncServerError>(())
});
tokio::select! {
_ = &mut send_task => receive_task.abort(),
_ = &mut receive_task => send_task.abort(),
};
let result: Result<(), SyncServerError> = (async {
send_task
.await
.context("WebSocket send task failed")
.map_err(client_error)
.and_then(|err| err)?;
receive_task
.await
.context("WebSocket receive task failed")
.map_err(client_error)
.and_then(|err| err)?;
Ok(())
})
.await;
state
.cursors
.remove_cursors_of_device(&vault_id, &authed_handshake.handshake.device_id)
.await;
if result.is_err() {
info!(
"WebSocket disconnected on vault '{vault_id}' for '{}'",
authed_handshake.handshake.device_id
);
}
result
}

Some files were not shown because too many files have changed in this diff Show more