Compare commits

..

23 commits

Author SHA1 Message Date
ea76c5ecc6
Add editorconfig 2025-04-02 22:06:29 +01:00
b51cf5c04e
Merge branch 'main' into asch/cursors 2025-04-02 21:32:36 +01:00
afee6eb680
Merge branch 'main' of https://github.com/schmelczer/obsidian-shared-sync 2025-04-02 21:32:20 +01:00
31a81921a1
Move cursor after file updates 2025-04-02 21:32:08 +01:00
5deb10ab8b
Add cursor position conversions 2025-04-02 21:29:48 +01:00
a2c51a9d5d
Fix tests 2025-04-02 21:27:36 +01:00
40323c33ee
Make JS API usable 2025-04-02 21:07:33 +01:00
565372b526
Enable tests 2025-04-02 21:07:14 +01:00
426e9f07b7
Fix compile error 2025-04-02 20:37:44 +01:00
5ed7e0ad3a
Move tests 2025-04-02 20:37:36 +01:00
38a8e6b774
Fix web tests 2025-04-01 22:53:50 +01:00
89b87efa59
Fix tests 2025-04-01 22:50:55 +01:00
680486c4b5
Implement cursor merging 2025-04-01 22:49:02 +01:00
941100d715
Lint 2025-04-01 22:46:03 +01:00
688fd2f1b1
Update API and add cursor position tests 2025-04-01 22:45:26 +01:00
168fb44b07
Fix double document creation on first sync 2025-04-01 22:42:14 +01:00
3744b0a633
Add apply_merge_context to cursor 2025-04-01 22:41:52 +01:00
998ee387d2
Add integration tests 2025-04-01 22:40:05 +01:00
23c288b1eb
Clean up 2025-04-01 20:53:38 +01:00
e9e2328f03
Expose merge_text_with_cursors on API 2025-04-01 20:10:03 +01:00
71ccd7b61d
Make Side printable 2025-03-30 21:27:04 +01:00
c9c0ffecf1
Add cursor types 2025-03-30 21:26:04 +01:00
23ba0d2c82
Add comments 2025-03-30 12:24:08 +01:00
462 changed files with 55345 additions and 39383 deletions

View file

@ -9,8 +9,3 @@ trim_trailing_whitespace = true
charset = utf-8
indent_style = space
indent_size = 4
tab_width = 4
[*.{yml,yaml,md}]
indent_size = 2
tab_width = 2

View file

@ -1,35 +0,0 @@
name: Check
on:
push:
branches: ["main"]
pull_request:
branches: ["main"]
workflow_dispatch:
env:
CARGO_TERM_COLOR: always
RUSTFLAGS: "-Dwarnings"
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Setup Node.js environment
uses: actions/setup-node@v4
with:
node-version: "25.x"
- name: Setup Rust toolchain
uses: dtolnay/rust-toolchain@stable
with:
toolchain: "1.92.0"
components: clippy, rustfmt
- name: Lint & test
run: scripts/check.sh

View file

@ -1,38 +0,0 @@
name: Deploy Documentation
on:
push:
branches:
- main
paths:
- "docs/**"
- ".forgejo/workflows/deploy-docs.yml"
workflow_dispatch:
concurrency:
group: pages
cancel-in-progress: false
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Setup Node.js environment
uses: actions/setup-node@v4
with:
node-version: "25.x"
- name: Build docs
run: scripts/build-docs.sh
- name: Upload artifact
uses: actions/upload-artifact@v4
with:
name: docs
path: docs/.vitepress/dist

View file

@ -1,71 +0,0 @@
name: E2E tests
on:
push:
branches: ["main"]
pull_request:
branches: ["main"]
schedule:
- cron: "0 * * * *"
workflow_dispatch:
concurrency:
group: e2e-tests
cancel-in-progress: false
env:
RUSTFLAGS: "-Dwarnings"
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Setup Node.js environment
uses: actions/setup-node@v4
with:
node-version: "25.x"
- name: Setup Rust toolchain
uses: dtolnay/rust-toolchain@stable
with:
toolchain: "1.92.0"
components: clippy, rustfmt
- name: Setup rust
run: |
which sqlx || cargo install sqlx-cli
cd sync-server
sqlx database create --database-url sqlite://db.sqlite3
sqlx migrate run --source src/app_state/database/migrations --database-url sqlite://db.sqlite3
- name: E2E tests
run: |
cd sync-server
cargo run config-e2e.yml --color never &
SERVER_PID=$!
cd ..
scripts/e2e.sh 8
EXIT_CODE=$?
kill $SERVER_PID 2>/dev/null || true
wait $SERVER_PID 2>/dev/null || true
exit $EXIT_CODE
- name: Upload e2e logs
if: always()
uses: actions/upload-artifact@v4
with:
name: e2e-logs
path: logs/
retention-days: 30
- name: Cleanup
if: always()
run: scripts/clean-up.sh

View file

@ -1,51 +0,0 @@
name: Publish CLI
on:
push:
branches: ["main"]
tags: ["*"]
pull_request:
branches: ["main"]
jobs:
publish-docker:
runs-on: ubuntu-docker
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Extract registry hostname
id: registry
run: echo "host=$(echo '${{ github.server_url }}' | sed 's|https\?://||')" >> "$GITHUB_OUTPUT"
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Log into container registry
uses: docker/login-action@v3
with:
registry: ${{ steps.registry.outputs.host }}
username: ${{ github.repository_owner }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract Docker metadata
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ steps.registry.outputs.host }}/${{ github.repository }}-cli
- name: Build and push Docker image
id: build-and-push
uses: docker/build-push-action@v5
with:
context: frontend
file: frontend/local-client-cli/Dockerfile
platforms: linux/amd64
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=registry,ref=${{ steps.registry.outputs.host }}/${{ github.repository }}-cli:buildcache
cache-to: type=registry,ref=${{ steps.registry.outputs.host }}/${{ github.repository }}-cli:buildcache,mode=max

View file

@ -1,71 +0,0 @@
name: Publish Obsidian plugin
on:
push:
tags: ["*"]
env:
CARGO_TERM_COLOR: always
jobs:
publish-plugin:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Setup Node.js environment
uses: actions/setup-node@v4
with:
node-version: "25.x"
- name: Build plugin
run: |
cd frontend
npm ci
npm run build
- name: Setup Rust toolchain
uses: dtolnay/rust-toolchain@stable
with:
toolchain: "1.92.0"
components: clippy, rustfmt
- name: Install cross-compilation tools
run: |
apt update
apt install -y gcc-aarch64-linux-gnu musl-tools gcc-mingw-w64-x86-64 jq
- name: Build Linux and Windows binaries
run: ./scripts/build-sync-server-binaries.sh
- name: Create release
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
SERVER_URL: ${{ github.server_url }}
REPO: ${{ github.repository }}
run: |
tag="${GITHUB_REF#refs/tags/}"
mkdir -p release
cp frontend/obsidian-plugin/dist/* release/
cp sync-server/artifacts/sync-server-* release/
# Create draft release via Forgejo API
RELEASE_ID=$(curl -s -X POST \
"${SERVER_URL}/api/v1/repos/${REPO}/releases" \
-H "Authorization: token ${GITHUB_TOKEN}" \
-H "Content-Type: application/json" \
-d "{\"tag_name\": \"${tag}\", \"name\": \"${tag}\", \"draft\": true}" \
| jq -r '.id')
# Upload release assets
for file in release/*; do
filename=$(basename "$file")
curl -s -X POST \
"${SERVER_URL}/api/v1/repos/${REPO}/releases/${RELEASE_ID}/assets?name=${filename}" \
-H "Authorization: token ${GITHUB_TOKEN}" \
-F "attachment=@${file}"
done

View file

@ -1,51 +0,0 @@
name: Publish server Docker image
on:
push:
branches: ["main"]
tags: ["*"]
pull_request:
branches: ["main"]
jobs:
publish-docker:
runs-on: ubuntu-docker
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Extract registry hostname
id: registry
run: echo "host=$(echo '${{ github.server_url }}' | sed 's|https\?://||')" >> "$GITHUB_OUTPUT"
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Log into container registry
if: github.ref_type == 'tag'
uses: docker/login-action@v3
with:
registry: ${{ steps.registry.outputs.host }}
username: ${{ github.repository_owner }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract Docker metadata
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ steps.registry.outputs.host }}/${{ github.repository }}
- name: Build and push Docker image
id: build-and-push
uses: docker/build-push-action@v5
with:
context: sync-server
platforms: linux/amd64,linux/arm64
push: ${{ github.ref_type == 'tag' }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=registry,ref=${{ steps.registry.outputs.host }}/${{ github.repository }}:buildcache
cache-to: type=registry,ref=${{ steps.registry.outputs.host }}/${{ github.repository }}:buildcache,mode=max

27
.github/dependabot.yml vendored Normal file
View file

@ -0,0 +1,27 @@
# To get started with Dependabot version updates, you'll need to specify which
# package ecosystems to update and where the package manifests are located.
# Please see the documentation for all configuration options:
# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
version: 2
updates:
- package-ecosystem: "npm"
directories: ["**"]
schedule:
interval: "daily"
- package-ecosystem: "docker"
directories: ["**"]
schedule:
interval: "daily"
- package-ecosystem: "cargo"
directories: ["**"]
schedule:
interval: "daily"
# Disable this for security reasons
# - package-ecosystem: "github-actions"
# directories: ["**"]
# schedule:
# interval: "daily"

66
.github/workflows/check.yml vendored Normal file
View file

@ -0,0 +1,66 @@
name: Check
on:
push:
branches: ["main"]
pull_request:
branches: ["main"]
env:
CARGO_TERM_COLOR: always
RUSTFLAGS: "-Dwarnings"
jobs:
build:
runs-on: self-hosted
steps:
- uses: actions/checkout@v4
- name: Setup Node.js environment
uses: actions/setup-node@v4.2.0
with:
node-version: "22.x"
check-latest: true
- name: Setup rust
run: |
cargo install sqlx-cli wasm-pack
cd backend
sqlx database create --database-url sqlite://db.sqlite3
sqlx migrate run --source sync_server/src/app_state/database/migrations --database-url sqlite://db.sqlite3
- name: Build wasm
run: |
cd backend
wasm-pack build --target web sync_lib
- name: Lint backend
run: |
cd backend
cargo clippy --all-targets --all-features
cargo fmt --all -- --check
- name: Test backend
run: |
cd backend
cargo test --verbose
cd sync_lib
wasm-pack test --node
- name: Lint frontend
run: |
cd frontend
npm ci
npm run build
npm run lint
if [[ $(git status --porcelain) ]]; then
git status --porcelain
echo "Failing CI because the working directory is not clean after linting"
exit 1
fi
- name: Test frontend
run: |
cd frontend
npm run test

56
.github/workflows/e2e.yml vendored Normal file
View file

@ -0,0 +1,56 @@
name: E2E tests
on:
push:
branches: ["main"]
pull_request:
branches: ["main"]
env:
CARGO_TERM_COLOR: always
RUSTFLAGS: "-Dwarnings"
jobs:
build:
runs-on: self-hosted
steps:
- uses: actions/checkout@v4
- name: Setup Node.js environment
uses: actions/setup-node@v4.2.0
with:
node-version: "22.x"
check-latest: true
- name: Setup rust
run: |
cargo install sqlx-cli wasm-pack
cd backend
sqlx database create --database-url sqlite://db.sqlite3
sqlx migrate run --source sync_server/src/app_state/database/migrations --database-url sqlite://db.sqlite3
- name: Build wasm
run: |
cd backend
wasm-pack build --target web sync_lib
- name: E2E tests
run: |
cd backend
cargo run -p sync_server config-e2e.yml --color never &
cd ..
scripts/update-api-types.sh
cd frontend
npm ci
npm run build
npm run lint
if [[ $(git status --porcelain) ]]; then
git status --porcelain
echo "Failing CI because the working directory is not clean after updating the API types"
exit 1
fi
cd ..
scripts/e2e.sh 32

89
.github/workflows/publish-docker.yml vendored Normal file
View file

@ -0,0 +1,89 @@
name: Publish server Docker image
on:
push:
branches: ["main"]
tags: ["*"]
pull_request:
branches: ["main"]
env:
# Use docker.io for Docker Hub if empty
REGISTRY: ghcr.io
# github.repository as <account>/<repo>
IMAGE_NAME: ${{ github.repository }}
jobs:
publish-docker:
runs-on: self-hosted
permissions:
contents: read
packages: write
# This is used to complete the identity challenge
# with sigstore/fulcio.
id-token: write
steps:
- name: Checkout repository
uses: actions/checkout@v4
# Install the cosign tool
# https://github.com/sigstore/cosign-installer
- name: Install cosign
if: github.ref_type == 'tag'
uses: sigstore/cosign-installer@59acb6260d9c0ba8f4a2f9d9b48431a222b68e20 #v3.5.0
with:
cosign-release: "v2.2.4"
# Set up BuildKit Docker container builder to be able to build
# multi-platform images and export cache
# https://github.com/docker/setup-buildx-action
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@f95db51fddba0c2d1ec667646a06c2ce06100226 # v3.0.0
# Login against a Docker registry
# https://github.com/docker/login-action
- name: Log into registry ${{ env.REGISTRY }}
if: github.ref_type == 'tag'
uses: docker/login-action@343f7c4344506bcbf9b4de18042ae17996df046d # v3.0.0
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
# Extract metadata (tags, labels) for Docker
# https://github.com/docker/metadata-action
- name: Extract Docker metadata
id: meta
uses: docker/metadata-action@96383f45573cb7f253c731d3b3ab81c87ef81934 # v5.0.0
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
# Build and push Docker image with Buildx
# https://github.com/docker/build-push-action
- name: Build and push Docker image
id: build-and-push
uses: docker/build-push-action@0565240e2d4ab88bba5387d719585280857ece09 # v5.0.0
with:
context: backend
push: ${{ github.ref_type == 'tag' }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max
# Sign the resulting Docker image digest.
# This will only write to the public Rekor transparency log when the Docker
# repository is public to avoid leaking data. If you would like to publish
# transparency data even for private images, pass --force to cosign below.
# https://github.com/sigstore/cosign
- name: Sign the published Docker image
if: ${{ github.ref_type == 'tag' }}
env:
# https://docs.github.com/en/actions/security-guides/security-hardening-for-github-actions#using-an-intermediate-environment-variable
TAGS: ${{ steps.meta.outputs.tags }}
DIGEST: ${{ steps.build-and-push.outputs.digest }}
# This step uses the identity token to provision an ephemeral certificate
# against the sigstore community Fulcio instance.
run: echo "${TAGS}" | xargs -I {} cosign sign --yes {}@${DIGEST}

46
.github/workflows/publish-plugin.yml vendored Normal file
View file

@ -0,0 +1,46 @@
name: Publish Obsidian plugin
on:
push:
tags: ["*"]
env:
CARGO_TERM_COLOR: always
jobs:
publish-plugin:
runs-on: self-hosted
steps:
- uses: actions/checkout@v4
- name: Setup Node.js environment
uses: actions/setup-node@v4.2.0
with:
node-version: "22.x"
check-latest: true
- name: Build wasm
run: |
cd backend
cargo install wasm-pack
wasm-pack build --target web sync_lib
- name: Build plugin
run: |
cd frontend
npm ci
npm run build
- name: Create release
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
tag="${GITHUB_REF#refs/tags/}"
cd frontend/obsidian-plugin/dist
gh release create "$tag" \
--title="$tag" \
--draft \
main.js manifest.json styles.css

41
.gitignore vendored
View file

@ -1,24 +1,17 @@
# npm
node_modules
# Exclude macOS Finder (System Explorer) View States
.DS_Store
# Frontend build folders
frontend/*/dist
# Rust build folders
sync-server/target
sync-server/artifacts
sync-server/bindings/*.ts
# build folders
sync-server/db.sqlite3*
**/databases
*.log
*.sqlx
target
.task
# npm
node_modules
# Exclude macOS Finder (System Explorer) View States
.DS_Store
# Rust build folder
backend/target
frontend/*/dist
backend/db.sqlite3*
backend/databases
*.log
*.sqlx

View file

@ -3,8 +3,6 @@
"jest.rootPath": "plugin",
"files.exclude": {
"**/dist": true,
"**/node_modules": true,
"**/.sqlx": true,
"**/target": true
"**/node_modules": true
}
}
}

155
CLAUDE.md
View file

@ -1,155 +0,0 @@
# CLAUDE.md
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
## Project shape
VaultLink is a self-hosted Obsidian file-sync system. Two halves of one repo:
- `sync-server/` — Rust (axum + sqlx/SQLite). Source of truth for vault state, broadcasts changes via WebSocket.
- `frontend/` — npm workspaces. The sync engine (`sync-client`) is consumed by an Obsidian plugin, a standalone CLI, a fuzz E2E harness, a scripted determinism harness, and a history UI.
The HTTP/WS API types are generated from Rust (`ts-rs`) and mirrored into the TS workspaces. **Never hand-edit files in `frontend/sync-client/src/services/types/` or `frontend/history-ui/src/lib/types/`** — run `scripts/update-api-types.sh` after changing anything Serde-derived in the server.
### Frontend workspaces
- `sync-client` — the sync engine; published to consumers via `dist/`. All other TS workspaces depend on it via `file:../sync-client`.
- `obsidian-plugin` — Obsidian plugin built from `sync-client`.
- `local-client-cli` — same engine wrapped as a standalone CLI.
- `history-ui` — vault-history web UI.
- `test-client` — fuzz E2E harness (random ops across N processes).
- `deterministic-tests` — scripted multi-client tests with an in-memory FS, run against a real server.
## Common commands
Pre-push hygiene (formats, lints, runs tests, requires clean git state):
```sh
scripts/check.sh --fix
```
Run the fuzz E2E (N parallel processes):
```sh
scripts/e2e.sh 12
# Logs land in logs/log_<i>.log. Clean with scripts/clean-up.sh
```
Run deterministic tests (require a release-built server in `sync-server/target/release/sync_server` — they spawn it themselves):
```sh
cd sync-server && cargo build --release && cd ..
cd frontend
npm run build -w sync-client -w deterministic-tests
node deterministic-tests/dist/cli.js # all
node deterministic-tests/dist/cli.js --filter=rename # subset
node deterministic-tests/dist/cli.js --filter=… -j 4 # cap parallelism
```
Run a single sync-client unit test by file:
```sh
cd frontend/sync-client && npx tsx --test 'src/**/sync-event-queue.test.ts'
```
Server: dev runs from `sync-server/` against `config-e2e.yml`:
```sh
cd sync-server
cargo run config-e2e.yml # dev
cargo build --release # used by both e2e harnesses
cargo test # unit + ts-rs binding export tests
```
Frontend dev (sync-client + obsidian-plugin watch in parallel):
```sh
cd frontend && npm install && npm run dev
```
Regenerate TS bindings from Rust types (touches `frontend/{sync-client,history-ui}/src/.../types/`):
```sh
scripts/update-api-types.sh
```
## SQLite / sqlx
The server uses `sqlx::query!` macros that need a prepared `.sqlx` cache to compile offline. Touching any SQL means regenerating it:
```sh
cd sync-server
sqlx database create --database-url sqlite://db.sqlite3
sqlx migrate run --source src/app_state/database/migrations --database-url sqlite://db.sqlite3
cargo sqlx prepare --workspace
```
New migrations: `sqlx migrate add --source src/app_state/database/migrations <name>`.
## Sync engine architecture
Read `frontend/sync-client/src/sync-operations/` to follow the sync engine; the rest of `sync-client` is plumbing (filesystem ops, persistence, services, telemetry).
The engine is **two independent loops with separate invariants**:
- **Wire loop** (`syncer.ts`) — drains the single-consumer FIFO queue. HTTP and WS handlers update record fields (`remoteRelativePath`, `parentVersionId`, `remoteHash`) and write content to the file at `record.localPath`. They never move files for path placement.
- **Path reconciler** (`reconciler.ts`) — runs after every drained event. Best-effort pass that moves files to make `localPath === remoteRelativePath`. The move graph is topologically sorted; cycles are resolved by reading every file in the cycle into memory and writing each back to its new slot (no tmp files). Records with pending local events are skipped on each pass — the reconciler operates only on settled records. Failures (slot occupied by an untracked file, etc.) are silent skips; the next pass retries.
**`SyncEventQueue`** (`sync-event-queue.ts`) holds:
- `byDocId: Map<DocumentId, DocumentRecord>` — primary record store.
- `byLocalPath: Map<RelativePath, DocumentRecord>` — derived index for path lookups, maintained at every mutation point.
- `events: SyncEvent[]` — pending wire ops in FIFO drain order.
```ts
DocumentRecord = {
documentId,
parentVersionId,
remoteHash?,
remoteRelativePath,
localPath: RelativePath | undefined
}
```
`localPath === undefined` means the doc has no local file yet — typically a remote create whose target slot was occupied at receive time; the reconciler will fetch and place when the slot frees (the bytes wait in `pendingPlacementContent`).
Local FS events from the watcher update `localPath` synchronously at enqueue time via `setLocalPath` / `upsertRecord`. The wire loop never updates it for path placement; only the reconciler does. A user rename onto a tracked slot enqueues a `LocalDelete` for the displaced doc (the OS rename clobbered its content) and clears that doc's `localPath`.
**Pending creates** use a `Promise<DocumentId>` chain to serialize dependent ops (`LocalUpdate`, `LocalDelete`) behind the still-in-flight `LocalCreate`. `resolveCreate` resolves the promise once the server returns a docId, and `replacePendingDocumentId` swaps the resolved id across already-queued events. `findLatestCreateForPath` is the lookup the watcher uses to attach dependents; `updatePendingCreatePath` rewrites a pending create's `event.path` in place when the user renames the file before its create has acked.
**Watermark.** `lastSeenUpdateId` uses a `MinCovered` (a contiguous-prefix tracker over a stream of integers): we only advance the published min when the next consecutive id has been processed, so out-of-order RemoteChange ids don't fool the WebSocket handshake into requesting a too-recent catch-up.
**Server catch-up.** The server's WS handshake replays events newer than the client's `last_seen_vault_update_id` from the `latest_document_versions` view (one row per doc, the latest). On those replayed rows `is_new_file` means _new to this client_ (`creation_vault_update_id > last_seen_vault_update_id`), not "this row is the doc's first version" — necessary because the catch-up only carries the latest version; if a doc was created and updated past the watermark, the client never sees its create otherwise.
## Edge-case patterns the sync engine has to survive
The two-loop split defuses most of the old race catalogue (slot-collision stashes, conflict-uuid divergence, `MoveOnConflict.NEW`/`EXISTING` policy choices) by separating wire transport from path placement. What's left:
**Pending-create docId is a `Promise`, not a string, until the create acks.** Any `LocalUpdate` / `LocalDelete` queued behind a still-in-flight `LocalCreate` carries the create's `resolvers.promise` as its `documentId`. `replacePendingDocumentId` swaps the resolved id across queued events when the create resolves; `===` comparisons against the resolved string elsewhere will silently fail until that swap runs. Anything that walks `events[]` looking for a docId match must either run after the swap or be tolerant of `Promise`-typed ids.
**`processCreate` reads `event.path` live, not `event.originalPath`.** The watcher rewrites `event.path` in place via `updatePendingCreatePath` when the user renames a pending-create file. `originalPath` was removed from `LocalCreate` events specifically because reading it would send the stale pre-rename path to the server.
**`record.localPath` mutates in place across awaits.** When the watcher renames a doc while a drain handler is awaiting an HTTP roundtrip, the queue mutates the in-flight event's record so subsequent reads see the new path. Snapshotting `record.localPath` into a local at function entry and using it after an `await` reads/writes a now-vacated slot. Read `record.localPath` live; only snapshot for the deliberate "did it change while I was awaiting" comparison.
**Reconciler-defer is the wire-loop's contract with the reconciler.** The reconciler skips records where `hasPendingLocalEventsForDocumentId` returns true. Wire-loop handlers can therefore freely write `remoteRelativePath` to whatever the server returned — even if it disagrees with `localPath` — knowing the reconciler won't move the file out from under a queued user rename.
**Watermark advancement is load-bearing both ways.** Branches that _skip_ a remote event without advancing `lastSeenUpdateId` create permanent gaps that re-deliver forever. Branches that _advance_ without applying the content lose data: the server has no further event to re-deliver, the catch-up only carries the latest version, and any state in between is gone. Don't advance unless the event was actually applied (or deliberately discarded after weighing both halves).
**`isNewFile` semantics differ between catch-up and real-time.** On WS handshake replay it means _new to this client_ (`creation_vault_update_id > last_seen_vault_update_id`); on real-time broadcasts it means _this version is the create_ (`creation_vault_update_id == vault_update_id`). A handler that decides based on one interpretation will be wrong on the other channel; reasoning about fetch-and-treat-as-new vs. ignore needs to know which channel delivered the event.
**Pause / disable-sync mid-flight** is the one race the new model doesn't structurally fix. An HTTP that committed server-side but whose response was discarded leaves the server holding a doc the client has no record of. Resume → offline scan → server-side dedupe handles it (the server merges the duplicate create into the existing doc), but if the merge produces a deconflict, the client picks up an extra file. Out of scope for the two-loop split.
**Cycle reconciliation uses in-memory content swap.** When the move graph contains a cycle, the reconciler reads every file in the cycle into memory and writes each back to its new slot, with no tmp files. A write-ahead marker at `.vaultlink/swap-<uuid>.json` lists each leg; on startup the reconciler reads the marker, hashes each `from` to determine which legs ran, and replays the rest. The `.vaultlink/**` glob is hard-coded as an internal ignore pattern so swap markers don't get sync'd.
## Two complementary E2E harnesses
- **`test-client` (fuzz):** random ops across N parallel processes for many minutes. Used by `scripts/e2e.sh`. Catches bugs nobody thought to write a test for, but reproductions are noisy.
- **`deterministic-tests`:** scripted scenarios with an in-memory FS pinned to a real server. Used to _capture_ a fuzz-discovered bug as a minimal repro before fixing it. See `frontend/deterministic-tests/README.md` for the step grammar (`pause-server`, `pause-websocket`, `barrier`, `assert-consistent`, etc.).
When a fuzz failure surfaces, the workflow is: root-cause from logs → write a deterministic test that fails on the bug → fix → confirm both the deterministic test and `e2e.sh` pass.
## Style
- TS: 4-space indent, no tabs, LF, prettier (`trailingComma: "none"`). YAML/MD use 2-space indent.
- Rust: `rustfmt.toml` enforces 4-space spaces, LF.
- Lint: ESLint for TS, Clippy for Rust, `cargo machete` for unused deps. All wired into `scripts/check.sh`.

View file

@ -2,24 +2,25 @@
[![Check](https://github.com/schmelczer/vault-link/actions/workflows/check.yml/badge.svg)](https://github.com/schmelczer/vault-link/actions/workflows/check.yml)
[![E2E tests](https://github.com/schmelczer/vault-link/actions/workflows/e2e.yml/badge.svg)](https://github.com/schmelczer/vault-link/actions/workflows/e2e.yml)
[![Publish server Docker image](https://github.com/schmelczer/vault-link/actions/workflows/publish-server-docker.yml/badge.svg)](https://github.com/schmelczer/vault-link/actions/workflows/publish-server-docker.yml)
[![Publish CLI](https://github.com/schmelczer/vault-link/actions/workflows/publish-cli-docker.yml/badge.svg)](https://github.com/schmelczer/vault-link/actions/workflows/publish-cli-docker.yml)
[![Publish server Docker image](https://github.com/schmelczer/vault-link/actions/workflows/publish-docker.yml/badge.svg)](https://github.com/schmelczer/vault-link/actions/workflows/publish-docker.yml)
[![Publish Obsidian plugin](https://github.com/schmelczer/vault-link/actions/workflows/publish-plugin.yml/badge.svg)](https://github.com/schmelczer/vault-link/actions/workflows/publish-plugin.yml)
## Develop
### Set up Node.JS 25 with [nvm](https://github.com/nvm-sh/nvm)
### Install [nvm](https://github.com/nvm-sh/nvm)
- `curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.1/install.sh | bash`
- `nvm install 25`
- `nvm use 25`
- Optionally, set the system-wide default: `nvm alias default 25`
- `nvm install 22`
- `nvm use 22`
- Optionally set the system-wide default: `nvm alias default 22`
### Set up Rust
- Install [`rustup`](https://rustup.rs): `curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh`
- Install [`wasm-pack`](https://rustwasm.github.io/wasm-pack/installer): `curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh`
- `cargo install cargo-insta sqlx-cli`
- `cargo install cargo-insta sqlx-cli cargo-edit`
### Install Obsidian on Linux
@ -30,50 +31,25 @@ flatpak install flathub md.obsidian.Obsidian
flatpak run md.obsidian.Obsidian
```
#### Run in development mode
Start the server:
```sh
cargo install sqlx-cli
cd sync-server
cargo run config-e2e.yml
```
```sh
cd frontend
npm install
npm run dev
```
### Scripts
#### Before pushing
```sh
scripts/check.sh --fix
```
#### Update HTTP API TS bindings
```sh
```sh
scripts/update-api-types.sh
```
#### Publish new version
#### Publish new version
```sh
scripts/bump-version.sh patch
```
#### Run E2E tests
```sh
scripts/e2e.sh 8
```sh
scripts/e2e.sh
```
And to clean up the logs & database files, run `scripts/clean-up.sh`
## Projects
- [Sync server](./sync-server/README.md)

View file

@ -2,4 +2,5 @@ target
Dockerfile
.dockerignore
databases
sync_lib/pkg
*.yml

File diff suppressed because it is too large Load diff

View file

@ -1,39 +1,22 @@
[package]
name = "sync_server"
rust-version = "1.89.0"
[workspace]
resolver = "2"
members = [
"reconcile",
"sync_server",
"sync_lib"
]
[workspace.package]
rust-version = "1.83"
authors = ["Andras Schmelczer <andras@schmelczer.dev>"]
edition = "2024"
license = "MIT"
repository = "https://github.com/schmelczer/vault-link"
version = "0.14.0"
version = "0.3.3"
[dependencies]
[workspace.dependencies]
serde = { version = "1.0.219", default-features = false, features = ["derive"] }
thiserror = { version = "2.0.12", default-features = false }
tokio = { version = "1.48.0", features = ["full"]}
uuid = { version = "1.16.0", features = ["v4", "serde"] }
log = { version = "0.4.28" }
anyhow = { version = "1.0.100", features = ["backtrace"] }
axum = { version = "0.7.4", features = ["ws", "macros", "tracing", "multipart"]}
axum-extra = { version = "0.9.6", features = ["typed-header"] }
axum_typed_multipart = "0.11.0"
tower-http = { version = "0.6.1", features = ["cors", "trace", "limit", "timeout"] }
tracing = "0.1.41"
tracing-subscriber = { version = "0.3.20", features = ["fmt", "env-filter"]}
humantime-serde = "1.1.1"
sqlx = { version = "0.8.6", features = ["sqlite", "runtime-tokio", "uuid", "chrono"] }
chrono = { version = "0.4.41", features = ["serde"] }
rand = "0.9.0"
sanitize-filename = "0.6.0"
regex = "1.12.2"
clap = { version = "4.5.38", features = ["derive"] }
futures = "0.3.31"
serde_yaml = "0.9.34"
serde_json = "1.0.140"
bimap = "0.6.3"
ts-rs = { version = "10.1", features = ["uuid-impl", "chrono-impl"] }
base64 = "0.22.1"
reconcile-text = { version = "0.8.0", features = ["serde"] }
thiserror = { version = "1.0.66", default-features = false }
[profile.release]
codegen-units = 1
@ -41,12 +24,12 @@ lto = true
opt-level = 3
strip="debuginfo" # Keep some info for better panics
[lints.rust]
[workspace.lints.rust]
unsafe_code = "forbid"
rust_2018_idioms = { level = "warn", priority = -1 }
missing_debug_implementations = "warn"
[lints.clippy]
[workspace.lints.clippy]
await_holding_lock = "warn"
dbg_macro = "warn"
empty_enum = "warn"
@ -60,6 +43,7 @@ inefficient_to_string = "warn"
linkedlist = "warn"
lossy_float_literal = "warn"
macro_use_imports = "warn"
match_on_vec_items = "warn"
match_wildcard_for_single_variants = "warn"
mem_forget = "warn"
needless_borrow = "warn"
@ -74,18 +58,18 @@ unnested_or_patterns = "warn"
unused_self = "warn"
verbose_file_reads = "warn"
large_stack_arrays = { level = "allow", priority = 1 } # https://github.com/rust-lang/rust-clippy/issues/13774
# Silly lints
implicit_return = { level = "allow", priority = 1 }
question_mark_used = { level = "allow", priority = 1 }
cast_possible_truncation = { level = "allow", priority = 1 }
doc_link_with_quotes = { level = "allow", priority = 1 }
cast_sign_loss = { level = "allow", priority = 1 }
cast_possible_wrap = { level = "allow", priority = 1 }
struct_field_names = { level = "allow", priority = 1 }
single_char_lifetime_names = { level = "allow", priority = 1 }
single_call_fn = { level = "allow", priority = 1 }
absolute_paths = { level = "allow", priority = 1 }
arithmetic_side_effects = { level = "allow", priority = 1 }
similar_names = { level = "allow", priority = 1 }
self_named_module_files = { level = "allow", priority = 1 }
single_char_lifetime_names = { level = "allow", priority = 1 }
missing_docs_in_private_items = { level = "allow", priority = 1 }
question_mark_used = { level = "allow", priority = 1 }
implicit_return = { level = "allow", priority = 1 }
pedantic = { level = "warn", priority = 0 }
[package.metadata.cargo-machete]
ignored = ["humantime-serde"] # only used in serde macro

33
backend/Dockerfile Normal file
View file

@ -0,0 +1,33 @@
FROM rust:1.85 AS builder
WORKDIR /usr/src/backend
RUN apt update && apt install -y musl-tools
RUN cargo install sqlx-cli
COPY . .
RUN sqlx database create --database-url sqlite://db.sqlite3
RUN sqlx migrate run --source sync_server/src/app_state/database/migrations --database-url sqlite://db.sqlite3
RUN cargo build --package sync_server --release --target x86_64-unknown-linux-musl
# Runtime image
FROM alpine:3.21.3
LABEL org.opencontainers.image.authors="andras@schmelczer.dev"
RUN apk add --no-cache curl
COPY --from=builder /usr/src/backend/target/x86_64-unknown-linux-musl/release/sync_server /app/sync_server
VOLUME /data
EXPOSE 3000/tcp
WORKDIR /data
HEALTHCHECK \
--interval=30s \
--timeout=5s \
CMD curl -f http://localhost:3000/ping || exit 1
ENTRYPOINT ["/app/sync_server"]

View file

@ -1,33 +1,23 @@
database:
databases_directory_path: databases
max_connections_per_vault: 12
cursor_timeout: 1m
server:
host: 0.0.0.0
port: 3000
max_body_size_mb: 512
max_clients_per_vault: 256
response_timeout: 30m
mergeable_file_extensions:
- md
- txt
users:
user_configs:
user_tokens:
- name: admin
token: test-token-change-me
vault_access:
type: allow_access_to_all
- name: other-admin
token: test-token-change-me2
vault_access:
vault_access:
type: allow_access_to_all
- name: test
token: other-test-token
vault_access:
type: allow_list
allowed:
- default
logging:
log_directory: logs
log_rotation: 7days
log_level: info
allowed:
- default

View file

@ -0,0 +1,23 @@
[package]
name = "reconcile"
version.workspace = true
edition.workspace = true
authors.workspace = true
license.workspace = true
repository.workspace = true
[dependencies]
serde = { version = "1.0.219", optional = true, features = ["derive"] }
[features]
serde = [ "dep:serde" ]
[dev-dependencies]
insta = "1.42.2"
pretty_assertions = "1.4.1"
serde = { version = "1.0.219", features = ["derive"] }
serde_yaml ="0.9.34"
test-case = "3.3.1"
[lints]
workspace = true

View file

@ -0,0 +1,2 @@
pub mod myers;
pub mod raw_operation;

View file

@ -0,0 +1,284 @@
//! Taken from <https://github.com/mitsuhiko/similar/blob/7e15c44de11a1cd61e1149189929e189ef977fd8/src/algorithms/myers.rs>
//! Myers' diff algorithm.
//!
//! * time: `O((N+M)D)`
//! * space `O(N+M)`
//!
//! See [the original article by Eugene W. Myers](http://www.xmailserver.org/diff2.pdf)
//! describing it.
//!
//! The implementation of this algorithm is based on the implementation by
//! Brandon Williams.
//!
//! # Heuristics
//!
//! At present this implementation of Myers' does not implement any more
//! advanced heuristics that would solve some pathological cases. For instance
//! passing two large and completely distinct sequences to the algorithm will
//! make it spin without making reasonable progress.
//! For potential improvements here see [similar#15](https://github.com/mitsuhiko/similar/issues/15).
use std::{
ops::{Index, IndexMut, Range},
vec,
};
use super::raw_operation::RawOperation;
use crate::{
tokenizer::token::Token,
utils::{common_prefix_len::common_prefix_len, common_suffix_len::common_suffix_len},
};
/// Myers' diff algorithm with deadline.
///
/// Diff `old`, between indices `old_range` and `new` between indices
/// `new_range`.
///
/// This diff is done with an optional deadline that defines the maximal
/// execution time permitted before it bails and falls back to an approximation.
pub fn diff<T>(old: &[Token<T>], new: &[Token<T>]) -> Vec<RawOperation<T>>
where
T: PartialEq + Clone + std::fmt::Debug,
{
let max_d = (old.len() + new.len()).div_ceil(2) + 1;
let mut vb = V::new(max_d);
let mut vf = V::new(max_d);
let mut result: Vec<RawOperation<T>> = vec![];
conquer(
old,
0..old.len(),
new,
0..new.len(),
&mut vf,
&mut vb,
&mut result,
);
result
}
// A D-path is a path which starts at (0,0) that has exactly D non-diagonal
// edges. All D-paths consist of a (D - 1)-path followed by a non-diagonal edge
// and then a possibly empty sequence of diagonal edges called a snake.
/// `V` contains the endpoints of the furthest reaching `D-paths`. For each
/// recorded endpoint `(x,y)` in diagonal `k`, we only need to retain `x`
/// because `y` can be computed from `x - k`. In other words, `V` is an array of
/// integers where `V[k]` contains the row index of the endpoint of the furthest
/// reaching path in diagonal `k`.
///
/// We can't use a traditional Vec to represent `V` since we use `k` as an index
/// and it can take on negative values. So instead `V` is represented as a
/// light-weight wrapper around a Vec plus an `offset` which is the maximum
/// value `k` can take on in order to map negative `k`'s back to a value >= 0.
#[derive(Debug)]
struct V {
offset: isize,
v: Vec<usize>, // Look into initializing this to -1 and storing isize
}
impl V {
fn new(max_d: usize) -> Self {
Self {
offset: max_d as isize,
v: vec![0; 2 * max_d],
}
}
fn len(&self) -> usize { self.v.len() }
}
impl Index<isize> for V {
type Output = usize;
fn index(&self, index: isize) -> &Self::Output { &self.v[(index + self.offset) as usize] }
}
impl IndexMut<isize> for V {
fn index_mut(&mut self, index: isize) -> &mut Self::Output {
&mut self.v[(index + self.offset) as usize]
}
}
fn split_at(range: Range<usize>, at: usize) -> (Range<usize>, Range<usize>) {
(range.start..at, at..range.end)
}
/// A `Snake` is a sequence of diagonal edges in the edit graph. Normally
/// a snake has a start end end point (and it is possible for a snake to have
/// a length of zero, meaning the start and end points are the same) however
/// we do not need the end point which is why it's not implemented here.
///
/// The divide part of a divide-and-conquer strategy. A D-path has D+1 snakes
/// some of which may be empty. The divide step requires finding the ceil(D/2) +
/// 1 or middle snake of an optimal D-path. The idea for doing so is to
/// simultaneously run the basic algorithm in both the forward and reverse
/// directions until furthest reaching forward and reverse paths starting at
/// opposing corners 'overlap'.
fn find_middle_snake<T>(
old: &[Token<T>],
old_range: Range<usize>,
new: &[Token<T>],
new_range: Range<usize>,
vf: &mut V,
vb: &mut V,
) -> Option<(usize, usize)>
where
T: PartialEq + Clone + std::fmt::Debug,
{
let n = old_range.len();
let m = new_range.len();
// By Lemma 1 in the paper, the optimal edit script length is odd or even as
// `delta` is odd or even.
let delta = n as isize - m as isize;
let odd = delta & 1 == 1;
// The initial point at (0, -1)
vf[1] = 0;
// The initial point at (N, M+1)
vb[1] = 0;
let d_max = (n + m).div_ceil(2) + 1;
assert!(vf.len() >= d_max);
assert!(vb.len() >= d_max);
for d in 0..d_max as isize {
// Forward path
for k in (-d..=d).rev().step_by(2) {
let mut x = if k == -d || (k != d && vf[k - 1] < vf[k + 1]) {
vf[k + 1]
} else {
vf[k - 1] + 1
};
let y = (x as isize - k) as usize;
// The coordinate of the start of a snake
let (x0, y0) = (x, y);
// While these sequences are identical, keep moving through the
// graph with no cost
if x < old_range.len() && y < new_range.len() {
let advance = common_prefix_len(
old,
old_range.start + x..old_range.end,
new,
new_range.start + y..new_range.end,
);
x += advance;
}
// This is the new best x value
vf[k] = x;
// Only check for connections from the forward search when N - M is
// odd and when there is a reciprocal k line coming from the other
// direction.
if odd && (k - delta).abs() <= (d - 1) {
// TODO optimize this so we don't have to compare against n
if vf[k] + vb[-(k - delta)] >= n {
// Return the snake
return Some((x0 + old_range.start, y0 + new_range.start));
}
}
}
// Backward path
for k in (-d..=d).rev().step_by(2) {
let mut x = if k == -d || (k != d && vb[k - 1] < vb[k + 1]) {
vb[k + 1]
} else {
vb[k - 1] + 1
};
let mut y = (x as isize - k) as usize;
// The coordinate of the start of a snake
if x < n && y < m {
let advance = common_suffix_len(
old,
old_range.start..old_range.start + n - x,
new,
new_range.start..new_range.start + m - y,
);
x += advance;
y += advance;
}
// This is the new best x value
vb[k] = x;
if !odd && (k - delta).abs() <= d {
// TODO optimize this so we don't have to compare against n
if vb[k] + vf[-(k - delta)] >= n {
// Return the snake
return Some((n - x + old_range.start, m - y + new_range.start));
}
}
}
// TODO: Maybe there's an opportunity to optimize and bail early?
}
None
}
fn conquer<T>(
old: &[Token<T>],
mut old_range: Range<usize>,
new: &[Token<T>],
mut new_range: Range<usize>,
vf: &mut V,
vb: &mut V,
result: &mut Vec<RawOperation<T>>,
) where
T: PartialEq + Clone + std::fmt::Debug,
{
// Check for common prefix
let common_prefix_len = common_prefix_len(old, old_range.clone(), new, new_range.clone());
if common_prefix_len > 0 {
result.push(RawOperation::Equal(
old[old_range.start..old_range.start + common_prefix_len].to_vec(),
));
}
old_range.start += common_prefix_len;
new_range.start += common_prefix_len;
// Check for common suffix
let common_suffix_len = common_suffix_len(old, old_range.clone(), new, new_range.clone());
let common_suffix = (
old_range.end - common_suffix_len,
new_range.end - common_suffix_len,
);
old_range.end -= common_suffix_len;
new_range.end -= common_suffix_len;
if old_range.is_empty() && new_range.is_empty() {
// Do nothing
} else if new_range.is_empty() {
result.push(RawOperation::Delete(
old[old_range.start..old_range.start + old_range.len()].to_vec(),
));
} else if old_range.is_empty() {
result.push(RawOperation::Insert(
new[new_range.start..new_range.start + new_range.len()].to_vec(),
));
} else if let Some((x_start, y_start)) =
find_middle_snake(old, old_range.clone(), new, new_range.clone(), vf, vb)
{
let (old_a, old_b) = split_at(old_range, x_start);
let (new_a, new_b) = split_at(new_range, y_start);
conquer(old, old_a, new, new_a, vf, vb, result);
conquer(old, old_b, new, new_b, vf, vb, result);
} else {
result.push(RawOperation::Delete(
old[old_range.start..old_range.end].to_vec(),
));
result.push(RawOperation::Insert(
new[new_range.start..new_range.end].to_vec(),
));
}
if common_suffix_len > 0 {
result.push(RawOperation::Equal(
old[common_suffix.0..common_suffix.0 + common_suffix_len].to_vec(),
));
}
}

View file

@ -0,0 +1,48 @@
use crate::tokenizer::token::Token;
#[derive(Debug, Clone, PartialEq)]
pub enum RawOperation<T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
Insert(Vec<Token<T>>),
Delete(Vec<Token<T>>),
Equal(Vec<Token<T>>),
}
impl<T> RawOperation<T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
pub fn tokens(&self) -> &Vec<Token<T>> {
match self {
RawOperation::Insert(tokens)
| RawOperation::Delete(tokens)
| RawOperation::Equal(tokens) => tokens,
}
}
pub fn original_text_length(&self) -> usize {
self.tokens().iter().map(Token::get_original_length).sum()
}
pub fn get_original_text(self) -> String { self.tokens().iter().map(Token::original).collect() }
/// Extends the operation with another operation if returning the new
/// operation. Only operations of the same type can be used to extend.
/// If the operations are of different types, returns None.
pub fn extend(self, other: RawOperation<T>) -> Option<RawOperation<T>> {
match (self, other) {
(RawOperation::Insert(tokens1), RawOperation::Insert(tokens2)) => Some(
RawOperation::Insert(tokens1.into_iter().chain(tokens2).collect()),
),
(RawOperation::Delete(tokens1), RawOperation::Delete(tokens2)) => Some(
RawOperation::Delete(tokens1.into_iter().chain(tokens2).collect()),
),
(RawOperation::Equal(tokens1), RawOperation::Equal(tokens2)) => Some(
RawOperation::Equal(tokens1.into_iter().chain(tokens2).collect()),
),
_ => None,
}
}
}

View file

@ -0,0 +1,10 @@
mod diffs;
mod operation_transformation;
mod tokenizer;
mod utils;
pub use operation_transformation::{
CursorPosition, EditedText, TextWithCursors, reconcile, reconcile_with_cursors,
reconcile_with_tokenizer,
};
pub use tokenizer::{Tokenizer, token::Token};

View file

@ -0,0 +1,411 @@
mod cursor;
mod edited_text;
mod merge_context;
mod operation;
pub use cursor::{CursorPosition, TextWithCursors};
pub use edited_text::EditedText;
pub use operation::Operation;
use crate::Tokenizer;
#[must_use]
pub fn reconcile(original: &str, left: &str, right: &str) -> String {
reconcile_with_cursors(original, left.into(), right.into())
.text
.to_string()
}
#[must_use]
pub fn reconcile_with_cursors<'a>(
original: &'a str,
left: TextWithCursors<'a>,
right: TextWithCursors<'a>,
) -> TextWithCursors<'static> {
let left_operations = EditedText::from_strings(original, left);
let right_operations = EditedText::from_strings(original, right);
let merged_operations = left_operations.merge(right_operations);
TextWithCursors::new_owned(merged_operations.apply(), merged_operations.cursors)
}
#[must_use]
pub fn reconcile_with_tokenizer<'a, F, T>(
original: &str,
left: TextWithCursors<'a>,
right: TextWithCursors<'a>,
tokenizer: &Tokenizer<T>,
) -> TextWithCursors<'static>
where
T: PartialEq + Clone + std::fmt::Debug,
{
let left_operations = EditedText::from_strings_with_tokenizer(original, left, tokenizer);
let right_operations = EditedText::from_strings_with_tokenizer(original, right, tokenizer);
let merged_operations = left_operations.merge(right_operations);
TextWithCursors::new_owned(merged_operations.apply(), merged_operations.cursors)
}
#[cfg(test)]
mod test {
use std::{fs, ops::Range, path::Path};
use pretty_assertions::assert_eq;
use test_case::test_matrix;
use super::*;
use crate::CursorPosition;
#[test]
fn test_merges() {
// Both replaced one token but different
test_merge_both_ways(
"original_1 original_2 original_3",
"original_1 edit_1 original_3",
"original_1 original_2 edit_2",
"original_1 edit_1 edit_2",
);
// Both replaced the same one token
test_merge_both_ways(
"original_1 original_2 original_3",
"original_1 edit_1 original_3",
"original_1 edit_1 original_3",
"original_1 edit_1 original_3",
);
// One deleted a large range, the other deleted subranges and inserted as
// well
test_merge_both_ways(
"original_1 original_2 original_3 original_4 original_5",
"original_1 original_5",
"original_1 edit_1 original_3 edit_2 original_5",
"original_1 edit_1 edit_2 original_5",
);
// One deleted a large range, the other inserted and deleted a partially
// overlapping range
test_merge_both_ways(
"original_1 original_2 original_3 original_4 original_5",
"original_1 original_5",
"original_1 edit_1 original_3 edit_2",
"original_1 edit_1 edit_2",
);
// Merge a replace and an append
test_merge_both_ways("a b ", "c d ", "a b c d ", "c d c d ");
test_merge_both_ways("a b c d e", "a e", "a c e", "a e");
test_merge_both_ways("a 0 1 2 b", "a b", "a E 1 F b", "a E F b");
test_merge_both_ways(
"a this one delete b",
"a b",
"a my one change b",
"a my change b",
);
test_merge_both_ways(
"this stays, this is one big delete, don't touch this",
"this stays, don't touch this",
"this stays, my one change, don't touch this",
"this stays, my change, don't touch this",
);
test_merge_both_ways("1 2 3 4 5 6", "1 6", "1 2 4 ", "1 ");
test_merge_both_ways(
"hello world",
"hi, world",
"hello my friend!",
"hi, my friend!",
);
test_merge_both_ways(
"both delete the same word",
"both the same word",
"both the same word",
"both the same word",
);
test_merge_both_ways(" ", "its utf-8!", " ", "its utf-8!");
test_merge_both_ways(
"both delete the same word but one a bit more",
"both the same word",
"both same word",
"both same word",
);
test_merge_both_ways(
"long text with one big delete and many small",
"long small",
"long with big and small",
"long small",
);
}
#[test]
fn test_reconcile_idempotent_inserts() {
// Both inserted the same prefix; this should get deduped
test_merge_both_ways(
"hi ",
"hi there ",
"hi there my friend ",
"hi there my friend ",
);
// The prefix of the 2nd appears on the 1st so it shouldn't get duplicated
test_merge_both_ways(
"hi ",
"hi there you ",
"hi there my friend ",
"hi there my friend you ",
);
test_merge_both_ways("a", "a b c", "a b c d", "a b c d");
test_merge_both_ways(
" |7ca2b36d-6ee7-49eb-8eb1-d77e4cc1a001| ",
" |7ca2b36d-6ee7-49eb-8eb1-d77e4cc1a001| |cd9195cc-103a-4f13-90c8-4fba0ba421ee| |d39156cc-cfd6-42a8-b70a-75020896069d| |fbad794c-9c47-41f2-a343-490284ecb5a0| |dup| ",
" |7ca2b36d-6ee7-49eb-8eb1-d77e4cc1a001| |cd9195cc-103a-4f13-90c8-4fba0ba421ee| |dup| ",
" |7ca2b36d-6ee7-49eb-8eb1-d77e4cc1a001| |cd9195cc-103a-4f13-90c8-4fba0ba421ee| |d39156cc-cfd6-42a8-b70a-75020896069d| |fbad794c-9c47-41f2-a343-490284ecb5a0| |dup| |dup| ");
}
#[test]
fn test_cursor_position_no_updates() {
let original = "hello world";
let left = TextWithCursors::new(
"hello world",
vec![CursorPosition {
id: 0,
char_index: 0,
}],
);
let right = TextWithCursors::new(
"hello world",
vec![CursorPosition {
id: 1,
char_index: 5,
}],
);
let merged = reconcile_with_cursors(original, left, right);
assert_eq!(
merged,
TextWithCursors::new(
"hello world",
vec![
CursorPosition {
id: 0,
char_index: 0
},
CursorPosition {
id: 1,
char_index: 5
}
]
)
);
}
#[test]
fn test_cursor_position_updates_with_inserts() {
let original = "hi";
let left = TextWithCursors::new(
"hi there",
vec![CursorPosition {
id: 0,
char_index: 7,
}],
);
let right = TextWithCursors::new(
"hi world!",
vec![
CursorPosition {
id: 1,
char_index: 9,
},
CursorPosition {
id: 2,
char_index: 1,
},
],
);
let merged = reconcile_with_cursors(original, left, right);
assert_eq!(
merged,
TextWithCursors::new(
"hi there world!",
vec![
CursorPosition {
id: 2,
char_index: 1,
},
CursorPosition {
id: 0,
char_index: 7
},
CursorPosition {
id: 1,
char_index: 15
},
]
)
);
}
#[test]
fn test_cursor_position_updates_with_deleted() {
let original = "a b c d";
let left = TextWithCursors::new(
"a b d",
vec![CursorPosition {
id: 0,
char_index: 1, // after a
}],
);
let right = TextWithCursors::new(
"c d",
vec![CursorPosition {
id: 1,
char_index: 1, // after c
}],
);
let merged = reconcile_with_cursors(original, left, right);
assert_eq!(
merged,
TextWithCursors::new(
" d",
vec![
CursorPosition {
id: 0,
char_index: 0
},
CursorPosition {
id: 1,
char_index: 1
}
]
)
);
}
#[test]
fn test_cursor_complex() {
let original = "this is some complex text to test cursor positions";
let left = TextWithCursors::new(
"this is really complex text for testing cursor positions",
vec![
CursorPosition {
id: 0,
char_index: 8,
}, // after "this is "
CursorPosition {
id: 1,
char_index: 22,
}, // after "this is really complex text"
],
);
let right = TextWithCursors::new(
"that was some complex sample to test cursor movements",
vec![
CursorPosition {
id: 2,
char_index: 5,
}, // after "that "
CursorPosition {
id: 3,
char_index: 29,
}, // after "some complex sample "
],
);
let merged = reconcile_with_cursors(original, left, right);
assert_eq!(
merged,
TextWithCursors::new(
"that was really complex sample for testing cursor movements",
vec![
CursorPosition {
id: 2,
char_index: 5
}, // unchanged
CursorPosition {
id: 0,
char_index: 9
}, // before "really"
CursorPosition {
id: 1,
char_index: 23
}, // inside of "s|ample" because "text" got replaced by "sample"
CursorPosition {
id: 3,
char_index: 31
}, // before "for"
]
)
);
}
#[test_matrix( [
"pride_and_prejudice.txt",
"romeo_and_juliet.txt",
"room_with_a_view.txt",
"kun_lu.txt",
], [
"pride_and_prejudice.txt",
"romeo_and_juliet.txt",
"room_with_a_view.txt",
"kun_lu.txt"
], [
"pride_and_prejudice.txt",
"romeo_and_juliet.txt",
"room_with_a_view.txt",
"kun_lu.txt"
], [0..10000, 10000..20000], [0..10000, 10000..20000], [0..10000, 10000..20000])]
fn test_merge_files_without_panic(
file_name_1: &str,
file_name_2: &str,
file_name_3: &str,
range_1: Range<usize>,
range_2: Range<usize>,
range_3: Range<usize>,
) {
let files = [file_name_1, file_name_2, file_name_3];
let permutations = [range_1, range_2, range_3];
let root = Path::new("tests/resources/");
let contents = files
.iter()
.zip(permutations.iter())
.map(|(file, range)| {
let path = root.join(file);
fs::read_to_string(&path)
.unwrap()
.chars()
.skip(range.start)
.take(range.end)
.collect::<String>()
})
.collect::<Vec<_>>();
let _ = reconcile(&contents[0], &contents[1], &contents[2]);
}
fn test_merge_both_ways(original: &str, edit_1: &str, edit_2: &str, expected: &str) {
assert_eq!(reconcile(original, edit_1, edit_2), expected);
assert_eq!(reconcile(original, edit_2, edit_1), expected);
}
}

View file

@ -0,0 +1,68 @@
use std::borrow::Cow;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use super::merge_context::MergeContext;
use crate::operation_transformation::Operation;
// CursorPosition represents the position of an identifiable cursor in a text
// document based on its (UTF-8) character index.
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone, PartialEq, Default)]
pub struct CursorPosition {
pub id: usize,
pub char_index: usize,
}
impl CursorPosition {
#[must_use]
pub fn apply_merge_context<T>(&self, context: &MergeContext<T>) -> Self
where
T: PartialEq + Clone + std::fmt::Debug,
{
let char_index = match context.last_operation() {
Some(Operation::Delete { index, .. }) => (*index) as i64,
_ => self.char_index as i64 + context.shift,
};
CursorPosition {
id: self.id,
char_index: char_index.max(0) as usize,
}
}
}
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone, PartialEq, Default)]
pub struct TextWithCursors<'a> {
pub text: Cow<'a, str>,
pub cursors: Vec<CursorPosition>,
}
impl<'a> TextWithCursors<'a> {
#[must_use]
pub fn new(text: &'a str, cursors: Vec<CursorPosition>) -> Self {
Self {
text: text.into(),
cursors,
}
}
#[must_use]
pub fn new_owned(text: String, cursors: Vec<CursorPosition>) -> Self {
Self {
text: text.into(),
cursors,
}
}
}
impl<'a> From<&'a str> for TextWithCursors<'a> {
fn from(text: &'a str) -> Self {
Self {
text: text.into(),
cursors: Vec::new(),
}
}
}

View file

@ -0,0 +1,377 @@
use core::iter;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use super::{CursorPosition, Operation, TextWithCursors};
use crate::{
diffs::{myers::diff, raw_operation::RawOperation},
operation_transformation::merge_context::MergeContext,
tokenizer::{Tokenizer, word_tokenizer::word_tokenizer},
utils::{
merge_iters::MergeSorted as _, ordered_operation::OrderedOperation, side::Side,
string_builder::StringBuilder,
},
};
/// A sequence of operations that can be applied to a text document.
/// `EditedText` supports merging two sequences of operations using the
/// principle of Operational Transformation.
///
/// It's mainly created through the `from_strings` method, then merged with
/// another `EditedText` derived from the same original text and then applied to
/// the original text to get the reconciled text of concurrent edits.
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone, PartialEq, Default)]
pub struct EditedText<'a, T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
text: &'a str,
operations: Vec<OrderedOperation<T>>,
pub(crate) cursors: Vec<CursorPosition>,
}
impl<'a> EditedText<'a, String> {
/// Create an `EditedText` from the given original (old) and updated (new)
/// strings. The returned `EditedText` represents the changes from the
/// original to the updated text. When the return value is applied to
/// the original text, it will result in the updated text. The default
/// word tokenizer is used to tokenize the text which splits the text on
/// whitespaces.
#[must_use]
pub fn from_strings(original: &'a str, updated: TextWithCursors<'a>) -> Self {
Self::from_strings_with_tokenizer(original, updated, &word_tokenizer)
}
}
impl<'a, T> EditedText<'a, T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
/// Create an `EditedText` from the given original (old) and updated (new)
/// strings. The returned `EditedText` represents the changes from the
/// original to the updated text. When the return value is applied to
/// the original text, it will result in the updated text. The tokenizer
/// function is used to tokenize the text.
pub fn from_strings_with_tokenizer(
original: &'a str,
updated: TextWithCursors<'a>,
tokenizer: &Tokenizer<T>,
) -> Self {
let original_tokens = (tokenizer)(original);
let updated_tokens = (tokenizer)(&updated.text);
let diff: Vec<RawOperation<T>> = diff(&original_tokens, &updated_tokens);
Self::new(
original,
Self::cook_operations(Self::elongate_operations(diff)).collect(),
updated.cursors,
)
}
// Turn raw operations into ordered operations while keeping track of old & new
// indexes.
fn cook_operations<I>(raw_operations: I) -> impl Iterator<Item = OrderedOperation<T>>
where
I: IntoIterator<Item = RawOperation<T>>,
{
let mut new_index = 0; // this is the start index of the operation on the new text
let mut order = 0; // this is the start index of the operation on the original text
raw_operations.into_iter().flat_map(move |raw_operation| {
let length = raw_operation.original_text_length();
let operation = match raw_operation {
RawOperation::Equal(..) => {
new_index += length;
order += length;
None
}
RawOperation::Insert(tokens) => {
let op = Operation::create_insert(new_index, tokens)
.map(|operation| OrderedOperation { order, operation });
new_index += length;
op
}
RawOperation::Delete(..) => {
let op = if cfg!(debug_assertions) {
Operation::create_delete_with_text(
new_index,
raw_operation.get_original_text(),
)
} else {
Operation::create_delete(new_index, length)
}
.map(|operation| OrderedOperation { order, operation });
order += length;
op
}
};
operation.into_iter()
})
}
fn elongate_operations<I>(raw_operations: I) -> Vec<RawOperation<T>>
where
I: IntoIterator<Item = RawOperation<T>>,
{
let mut maybe_previous_insert: Option<RawOperation<T>> = None;
let mut maybe_previous_delete: Option<RawOperation<T>> = None;
let mut result: Vec<RawOperation<T>> = raw_operations
.into_iter()
.flat_map(|next| match next {
RawOperation::Insert(..) => {
if let Some(prev) = maybe_previous_insert.take() {
maybe_previous_insert = prev.extend(next);
} else {
maybe_previous_insert = Some(next);
}
Box::new(iter::empty()) as Box<dyn Iterator<Item = RawOperation<T>>>
}
RawOperation::Delete(..) => {
if let Some(prev) = maybe_previous_delete.take() {
maybe_previous_delete = prev.extend(next);
} else {
maybe_previous_delete = Some(next);
}
Box::new(iter::empty()) as Box<dyn Iterator<Item = RawOperation<T>>>
}
RawOperation::Equal(..) => Box::new(
maybe_previous_insert
.take()
.into_iter()
.chain(maybe_previous_delete.take())
.chain(iter::once(next)),
)
as Box<dyn Iterator<Item = RawOperation<T>>>,
})
.collect();
if let Some(prev) = maybe_previous_insert {
result.push(prev);
}
if let Some(prev) = maybe_previous_delete {
result.push(prev);
}
result
}
/// Create a new `EditedText` with the given operations.
/// The operations must be in the order in which they are meant to be
/// applied. The operations must not overlap.
fn new(
text: &'a str,
operations: Vec<OrderedOperation<T>>,
mut cursors: Vec<CursorPosition>,
) -> Self {
operations
.iter()
.zip(operations.iter().skip(1))
.for_each(|(previous, next)| {
debug_assert!(
previous.operation.start_index() <= next.operation.start_index(),
"{} must not come before {} yet it does",
previous.operation,
next.operation
);
});
cursors.sort_by_key(|cursor| cursor.char_index);
Self {
text,
operations,
cursors,
}
}
#[must_use]
pub fn merge(self, other: Self) -> Self {
debug_assert_eq!(
self.text, other.text,
"`EditedText`-s must be derived from the same text to be mergable"
);
let mut left_merge_context = MergeContext::default();
let mut right_merge_context = MergeContext::default();
let mut merged_cursors = Vec::with_capacity(self.cursors.len() + other.cursors.len());
let mut left_cursors = self.cursors.iter().peekable();
let mut right_cursors = other.cursors.iter().peekable();
let merged_operations = self
.operations
.into_iter()
// The current text is always the left; the other operation is the right side.
.map(|op| (op, Side::Left))
.merge_sorted_by_key(
other.operations.into_iter().map(|op| (op, Side::Right)),
|(operation, _)| {
(
operation.order,
// Operations on the left and right must come in the same order so that
// inserts can be merged with other inserts and deletes with deletes.
usize::from(matches!(operation.operation, Operation::Delete { .. })),
// Make sure that the ordering is deterministic regardless which text
// is left or right.
match &operation.operation {
Operation::Insert { text, .. } => text
.iter()
.map(super::super::tokenizer::token::Token::original)
.collect::<String>(),
Operation::Delete {
deleted_character_count,
..
} => deleted_character_count.to_string(),
},
)
},
)
.flat_map(|(OrderedOperation { order, operation }, side)| {
match side {
Side::Left => {
while let Some(cursor) = left_cursors
.next_if(|cursor| cursor.char_index <= operation.start_index())
{
right_merge_context.consume_last_operation_if_it_is_too_behind(
cursor.char_index as i64,
);
merged_cursors.push(cursor.apply_merge_context(&right_merge_context));
}
while let Some(cursor) = right_cursors.next_if(|cursor| {
cursor.char_index as i64
<= operation.start_index() as i64 + right_merge_context.shift
- left_merge_context.shift
}) {
left_merge_context.consume_last_operation_if_it_is_too_behind(
cursor.char_index as i64,
);
merged_cursors.push(cursor.apply_merge_context(&left_merge_context));
}
operation.merge_operations_with_context(
&mut right_merge_context,
&mut left_merge_context,
)
}
Side::Right => {
while let Some(cursor) = right_cursors
.next_if(|cursor| cursor.char_index <= operation.start_index())
{
left_merge_context.consume_last_operation_if_it_is_too_behind(
cursor.char_index as i64,
);
merged_cursors.push(cursor.apply_merge_context(&left_merge_context));
}
while let Some(cursor) = left_cursors.next_if(|cursor| {
cursor.char_index as i64
<= operation.start_index() as i64 + left_merge_context.shift
- right_merge_context.shift
}) {
right_merge_context.consume_last_operation_if_it_is_too_behind(
cursor.char_index as i64,
);
merged_cursors.push(cursor.apply_merge_context(&right_merge_context));
}
operation.merge_operations_with_context(
&mut left_merge_context,
&mut right_merge_context,
)
}
}
.map(|operation| OrderedOperation { order, operation })
.into_iter()
})
.collect();
for cursor in left_cursors {
right_merge_context
.consume_last_operation_if_it_is_too_behind(cursor.char_index as i64);
merged_cursors.push(cursor.apply_merge_context(&right_merge_context));
}
for cursor in right_cursors {
left_merge_context.consume_last_operation_if_it_is_too_behind(cursor.char_index as i64);
merged_cursors.push(cursor.apply_merge_context(&left_merge_context));
}
Self::new(self.text, merged_operations, merged_cursors)
}
/// Apply the operations to the text and return the resulting text.
#[must_use]
pub fn apply(&self) -> String {
let mut builder: StringBuilder<'_> = StringBuilder::new(self.text);
for OrderedOperation { operation, .. } in &self.operations {
builder = operation.apply(builder);
}
builder.build()
}
}
#[cfg(test)]
mod tests {
use std::env;
use pretty_assertions::assert_eq;
use super::*;
#[test]
fn test_calculate_operations() {
let left = "hello world! How are you? Adam";
let right = "Hello, my friend! How are you doing? Albert";
let operations = EditedText::from_strings(left, right.into());
insta::assert_debug_snapshot!(operations);
let new_right = operations.apply();
assert_eq!(new_right.to_string(), right);
}
#[test]
fn test_calculate_operations_with_no_diff() {
let text = "hello world!";
let operations = EditedText::from_strings(text, text.into());
assert_eq!(operations.operations.len(), 0);
let new_right = operations.apply();
assert_eq!(new_right.to_string(), text);
}
#[test]
fn test_calculate_operations_with_insert() {
let original = "hello world! ...";
let left = "Hello world! I'm Andras.";
let right = "Hello world! How are you?";
let expected = "Hello world! How are you? I'm Andras.";
let operations_1 = EditedText::from_strings(original, left.into());
let operations_2 = EditedText::from_strings(original, right.into());
let operations = operations_1.merge(operations_2);
assert_eq!(operations.apply(), expected);
}
}

View file

@ -0,0 +1,74 @@
use core::fmt::Debug;
use crate::operation_transformation::Operation;
#[derive(Clone, Debug)]
pub struct MergeContext<T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
last_operation: Option<Operation<T>>,
pub shift: i64,
}
impl<T> Default for MergeContext<T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
fn default() -> Self {
MergeContext {
last_operation: None,
shift: 0,
}
}
}
impl<T> MergeContext<T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
pub fn last_operation(&self) -> Option<&Operation<T>> { self.last_operation.as_ref() }
pub fn replace_last_operation(&mut self, operation: Option<Operation<T>>) {
self.last_operation = operation;
}
/// Replace the last delete operation (if there was one) with a new one
/// while applying it to the `shift` in case the last operation
/// was a delete.
pub fn consume_and_replace_last_operation(&mut self, operation: Option<Operation<T>>) {
if let Some(Operation::Delete {
deleted_character_count,
..
}) = self.last_operation.take()
{
self.shift -= deleted_character_count as i64;
}
self.last_operation = operation;
}
/// Remove the last operation (if there was one) in case it is behind the
/// threshold operation. This updates the `shift` in case the last operation
/// was a delete.
pub fn consume_last_operation_if_it_is_too_behind(&mut self, threshold_index: i64) {
if let Some(last_operation) = self.last_operation.as_ref() {
if let Operation::Delete {
deleted_character_count,
..
} = last_operation
{
if threshold_index + self.shift > last_operation.end_index() as i64 {
self.shift -= *deleted_character_count as i64;
self.last_operation = None;
}
} else if let Operation::Insert { .. } = last_operation {
if threshold_index + self.shift - last_operation.len() as i64
> last_operation.end_index() as i64
{
self.last_operation = None;
}
}
}
}
}

View file

@ -0,0 +1,377 @@
use core::fmt::{Debug, Display};
use std::ops::Range;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use super::merge_context::MergeContext;
use crate::{
Token,
utils::{
find_longest_prefix_contained_within::find_longest_prefix_contained_within,
string_builder::StringBuilder,
},
};
/// Represents a change that can be applied to a text document.
/// Operation is tied to a `ropey::Rope` and is mainly expected to be
/// created by `EditedText`.
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Clone, PartialEq)]
pub enum Operation<T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
Insert {
index: usize,
text: Vec<Token<T>>,
},
Delete {
index: usize,
deleted_character_count: usize,
#[cfg(debug_assertions)]
deleted_text: Option<String>,
},
}
impl<T> Operation<T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
/// Creates an insert operation with the given index and text.
/// If the text is empty (meaning that the operation would be a no-op),
/// returns None.
pub fn create_insert(index: usize, text: Vec<Token<T>>) -> Option<Self> {
if text.is_empty() {
return None;
}
Some(Operation::Insert { index, text })
}
/// Creates a delete operation with the given index and number of
/// to-be-deleted characters. If the operation would delete 0 (meaning
/// that the operation would be a no-op), returns None.
pub fn create_delete(index: usize, deleted_character_count: usize) -> Option<Self> {
if deleted_character_count == 0 {
return None;
}
Some(Operation::Delete {
index,
deleted_character_count,
#[cfg(debug_assertions)]
deleted_text: None,
})
}
pub fn create_delete_with_text(index: usize, text: String) -> Option<Self> {
if text.is_empty() {
return None;
}
Some(Operation::Delete {
index,
deleted_character_count: text.chars().count(),
#[cfg(debug_assertions)]
deleted_text: Some(text),
})
}
/// Applies the operation to the given `StringBuilder`, returning the
/// modified `StringBuilder`.
///
/// When compiled in debug mode, panics if a delete operation is attempted
/// on a range of text that does not match the text to be deleted.
pub fn apply<'a>(&self, mut builder: StringBuilder<'a>) -> StringBuilder<'a> {
match self {
Operation::Insert { text, .. } => builder.insert(
self.start_index(),
&text.iter().map(Token::original).collect::<String>(),
),
Operation::Delete {
#[cfg(debug_assertions)]
deleted_text,
..
} => {
#[cfg(debug_assertions)]
debug_assert!(
deleted_text
.as_ref()
.is_none_or(|text| builder.get_slice(self.range()) == *text),
"Text to delete does not match the text in the range"
);
builder.delete(self.range());
}
}
builder
}
/// Returns the index of the first character that the operation affects.
pub fn start_index(&self) -> usize {
match self {
Operation::Insert { index, .. } | Operation::Delete { index, .. } => *index,
}
}
/// Returns the index of the last character that the operation affects.
pub fn end_index(&self) -> usize {
debug_assert!(
self.len() > 0,
" len() must be greater than 0 because operations must be non-empty"
);
self.start_index() + self.len() - 1
}
/// Returns the range of indices of characters that the operation affects.
#[allow(clippy::range_plus_one)]
pub fn range(&self) -> Range<usize> { self.start_index()..self.end_index() + 1 }
/// Returns the number of affected characters. It is always greater than 0
/// because empty operations cannot be created.
pub fn len(&self) -> usize {
match self {
Operation::Insert { text, .. } => text.iter().map(Token::get_original_length).sum(),
Operation::Delete {
deleted_character_count,
..
} => *deleted_character_count,
}
}
/// Creates a new operation with the same type and text but with the given
/// index.
pub fn with_index(self, index: usize) -> Self {
match self {
Operation::Insert { text, .. } => Operation::Insert { index, text },
Operation::Delete {
deleted_character_count,
#[cfg(debug_assertions)]
deleted_text,
..
} => Operation::Delete {
index,
deleted_character_count,
#[cfg(debug_assertions)]
deleted_text,
},
}
}
/// Creates a new operation with the same type and text but with the index
/// shifted by the given offset. The offset can be negative but the
/// resulting index must be non-negative.
///
/// # Panics
///
/// In debug mode, panics if the resulting index is negative.
pub fn with_shifted_index(self, offset: i64) -> Self {
let index = self.start_index() as i64 + offset;
debug_assert!(index >= 0, "Shifted index must be non-negative");
self.with_index(index as usize)
}
/// Merges the operation with the given context, producing a new operation
/// and updating the context. This implements a comples FSM that handles
/// the merging of operations in a way that is consistent with the text.
/// The contexts are updated in-place.
pub fn merge_operations_with_context(
self,
affecting_context: &mut MergeContext<T>,
produced_context: &mut MergeContext<T>,
) -> Option<Operation<T>> {
affecting_context.consume_last_operation_if_it_is_too_behind(self.start_index() as i64);
let operation = self.with_shifted_index(affecting_context.shift);
match (operation, affecting_context.last_operation()) {
(operation @ Operation::Insert { .. }, None) => {
produced_context.shift += operation.len() as i64;
produced_context.consume_and_replace_last_operation(Some(operation.clone()));
Some(operation)
}
(
Operation::Insert { text, index },
Some(Operation::Insert {
text: previous_inserted_text,
..
}),
) => {
// In case the current insert's prefix appears in the previously inserted text,
// we can trim the current insert to only include the non-overlapping part.
// This way, we don't end up duplicating text.
let offset_in_tokens =
find_longest_prefix_contained_within(previous_inserted_text, &text);
let offset_in_length = text
.iter()
.take(offset_in_tokens)
.map(Token::get_original_length)
.sum::<usize>();
let trimmed_operation =
Operation::create_insert(index, text[offset_in_tokens..].to_vec());
affecting_context.shift -= offset_in_length as i64;
produced_context.shift += trimmed_operation
.as_ref()
.map(Operation::len)
.unwrap_or_default() as i64;
produced_context.consume_and_replace_last_operation(trimmed_operation.clone());
trimmed_operation
}
(operation @ Operation::Delete { .. }, None | Some(Operation::Insert { .. })) => {
produced_context.consume_and_replace_last_operation(Some(operation.clone()));
Some(operation)
}
(
operation @ Operation::Insert { .. },
Some(last_delete @ Operation::Delete { .. }),
) => {
produced_context.shift += operation.len() as i64;
debug_assert!(
last_delete.range().contains(&operation.start_index()),
"There is a last delete ({last_delete}) but the operation ({operation}) is \
not contained in it"
);
let difference = operation.start_index() as i64 - last_delete.start_index() as i64;
let moved_operation = operation.with_index(last_delete.start_index());
affecting_context.replace_last_operation(Operation::create_delete(
moved_operation.end_index() + 1,
(last_delete.len() as i64 - difference) as usize,
));
affecting_context.shift -= difference;
produced_context.consume_and_replace_last_operation(Some(moved_operation.clone()));
Some(moved_operation)
}
(
operation @ Operation::Delete { .. },
Some(last_delete @ Operation::Delete { .. }),
) => {
debug_assert!(
last_delete.range().contains(&operation.start_index()),
"There is a last delete ({last_delete}) but the operation ({operation}) is \
not contained in it"
);
let difference = operation.start_index() as i64 - last_delete.start_index() as i64;
let updated_delete = Operation::create_delete(
last_delete.start_index(),
0.max(operation.end_index() as i64 - last_delete.end_index() as i64) as usize,
);
affecting_context.replace_last_operation(Operation::create_delete(
last_delete.start_index(),
0.max(last_delete.end_index() as i64 - operation.end_index() as i64) as usize,
));
affecting_context.shift -= difference;
produced_context.consume_and_replace_last_operation(updated_delete.clone());
updated_delete
}
}
}
}
impl<T> Display for Operation<T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match self {
Operation::Insert { index, text } => {
write!(
f,
"<insert '{}' from index {}>",
text.iter().map(Token::original).collect::<String>(),
index
)
}
Operation::Delete {
index,
deleted_character_count,
#[cfg(debug_assertions)]
deleted_text,
} => {
#[cfg(debug_assertions)]
write!(
f,
"<delete {} from index {}>",
deleted_text
.as_ref()
.map(|text| format!("'{text}'"))
.unwrap_or(format!("{deleted_character_count} characters")),
index
)?;
#[cfg(not(debug_assertions))]
write!(
f,
"<delete {deleted_character_count} characters from index {index}>",
)?;
Ok(())
}
}
}
}
impl<T> Debug for Operation<T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { write!(f, "{self}") }
}
#[cfg(test)]
mod tests {
use pretty_assertions::assert_eq;
use super::*;
#[test]
#[should_panic(expected = "Shifted index must be non-negative")]
fn test_shifting_error() {
insta::assert_debug_snapshot!(
Operation::create_insert(1, vec!["hi".into()])
.unwrap()
.with_shifted_index(-2)
);
}
#[test]
fn test_apply_delete_with_create() {
let builder = StringBuilder::new("hello world");
let operation = Operation::<()>::create_delete_with_text(5, " world".to_owned()).unwrap();
assert_eq!(operation.apply(builder).build(), "hello");
}
#[test]
fn test_apply_insert() {
let builder = StringBuilder::new("hello");
let operation = Operation::create_insert(5, vec![" my friend".into()]).unwrap();
assert_eq!(operation.apply(builder).build(), "hello my friend");
}
}

View file

@ -0,0 +1,27 @@
---
source: reconcile/src/operation_transformation/edited_text.rs
expression: operations
snapshot_kind: text
---
EditedText {
text: "hello world! How are you? Adam",
operations: [
OrderedOperation {
order: 0,
operation: <insert 'Hello, my friend!' from index 0>,
},
OrderedOperation {
order: 0,
operation: <delete 'hello world!' from index 17>,
},
OrderedOperation {
order: 20,
operation: <insert ' you doing? Albert' from index 25>,
},
OrderedOperation {
order: 20,
operation: <delete ' you? Adam' from index 43>,
},
],
cursors: [],
}

View file

@ -0,0 +1,61 @@
---
source: reconcile/src/operations/edited_text.rs
expression: operations
snapshot_kind: text
---
EditedText {
text: "hello world! How are you? Adam",
operations: [
OrderedOperation {
order: 0,
operation: Insert {
index: 0,
text: "Hello, my friend! ",
},
},
OrderedOperation {
order: 0,
operation: Delete {
index: 18,
deleted_character_count: 13,
deleted_text: Some(
"hello world! ",
),
},
},
OrderedOperation {
order: 21,
operation: Delete {
index: 26,
deleted_character_count: 5,
deleted_text: Some(
"you? ",
),
},
},
OrderedOperation {
order: 26,
operation: Delete {
index: 26,
deleted_character_count: 5,
deleted_text: Some(
" Adam",
),
},
},
OrderedOperation {
order: 31,
operation: Insert {
index: 26,
text: "you ",
},
},
OrderedOperation {
order: 31,
operation: Insert {
index: 30,
text: "doing? Albert",
},
},
],
}

View file

@ -0,0 +1,60 @@
---
source: reconcile/src/operations/operation_sequence.rs
expression: operations
snapshot_kind: text
---
EditedText {
operations: [
OrderedOperation {
order: 0,
operation: Insert {
index: 0,
text: "Hello, my friend! ",
},
},
OrderedOperation {
order: 0,
operation: Delete {
index: 18,
deleted_character_count: 13,
deleted_text: Some(
"hello world! ",
),
},
},
OrderedOperation {
order: 21,
operation: Delete {
index: 26,
deleted_character_count: 5,
deleted_text: Some(
"you? ",
),
},
},
OrderedOperation {
order: 26,
operation: Delete {
index: 26,
deleted_character_count: 5,
deleted_text: Some(
" Adam",
),
},
},
OrderedOperation {
order: 31,
operation: Insert {
index: 26,
text: "you ",
},
},
OrderedOperation {
order: 31,
operation: Insert {
index: 30,
text: "doing? Albert",
},
},
],
}

View file

@ -0,0 +1,6 @@
use token::Token;
pub mod token;
pub mod word_tokenizer;
pub type Tokenizer<T> = dyn Fn(&str) -> Vec<Token<T>>;

View file

@ -0,0 +1,6 @@
---
source: reconcile/src/tokenizer/word_tokenizer.rs
expression: "word_tokenizer(\"\")"
snapshot_kind: text
---
[]

View file

@ -0,0 +1,15 @@
---
source: reconcile/src/tokenizer/word_tokenizer.rs
expression: "word_tokenizer(\" what? \")"
snapshot_kind: text
---
[
Token {
normalised: "what?",
original: " what?",
},
Token {
normalised: "",
original: " ",
},
]

View file

@ -0,0 +1,23 @@
---
source: reconcile/src/tokenizer/word_tokenizer.rs
expression: "word_tokenizer(\" hello, \\nwhere are you?\")"
snapshot_kind: text
---
[
Token {
normalised: "hello,",
original: " hello,",
},
Token {
normalised: "where",
original: " \nwhere",
},
Token {
normalised: "are",
original: " are",
},
Token {
normalised: "you?",
original: " you?",
},
]

View file

@ -0,0 +1,15 @@
---
source: reconcile/src/tokenizer/word_tokenizer.rs
expression: "word_tokenizer(\"Hi there!\")"
snapshot_kind: text
---
[
Token {
normalised: "Hi",
original: "Hi",
},
Token {
normalised: "there!",
original: " there!",
},
]

View file

@ -0,0 +1,44 @@
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
/// A token is a string that has been normalised in some way.
/// The normalised form is used for comparison, while the original form is used
/// for applying Operations.
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone)]
pub struct Token<T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
normalised: T,
original: String,
}
impl From<&str> for Token<String> {
fn from(s: &str) -> Self { Token::new(s.trim().to_owned(), s.to_owned()) }
}
impl<T> Token<T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
pub fn new(normalised: T, original: String) -> Self {
Token {
normalised,
original,
}
}
pub fn original(&self) -> &str { &self.original }
pub fn normalised(&self) -> &T { &self.normalised }
pub fn get_original_length(&self) -> usize { self.original.chars().count() }
}
impl<T> PartialEq for Token<T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
fn eq(&self, other: &Self) -> bool { self.normalised == other.normalised }
}

View file

@ -0,0 +1,48 @@
use super::token::Token;
/// Splits on whitespace keeping the leading whitespace.
///
///
/// ## Example
///
/// "Hi there!" -> ["Hi", " there!"]
pub fn word_tokenizer(text: &str) -> Vec<Token<String>> {
let mut result: Vec<Token<String>> = Vec::new();
let mut last_whitespace = 0;
let mut previous_char_is_whitespace = true;
for (i, c) in text.char_indices() {
let is_current_char_whitespace = c.is_whitespace();
if !previous_char_is_whitespace && is_current_char_whitespace {
result.push(text[last_whitespace..i].into());
last_whitespace = i;
}
previous_char_is_whitespace = is_current_char_whitespace;
}
if last_whitespace < text.len() {
result.push(text[last_whitespace..].into());
}
result
}
#[cfg(test)]
mod tests {
use insta::assert_debug_snapshot;
use super::*;
#[test]
fn test_with_snapshots() {
assert_debug_snapshot!(word_tokenizer("Hi there!"));
assert_debug_snapshot!(word_tokenizer(""));
assert_debug_snapshot!(word_tokenizer(" what? "));
assert_debug_snapshot!(word_tokenizer(" hello, \nwhere are you?"));
}
}

View file

@ -0,0 +1,7 @@
pub mod common_prefix_len;
pub mod common_suffix_len;
pub mod find_longest_prefix_contained_within;
pub mod merge_iters;
pub mod ordered_operation;
pub mod side;
pub mod string_builder;

View file

@ -0,0 +1,47 @@
use core::ops::{Index, Range};
/// Given two lookups and ranges calculates the length of the common prefix.
/// Copied from <https://github.com/mitsuhiko/similar/blob/7e15c44de11a1cd61e1149189929e189ef977fd8/src/algorithms/utils.rs>
pub fn common_prefix_len<Old, New>(
old: &Old,
old_range: Range<usize>,
new: &New,
new_range: Range<usize>,
) -> usize
where
Old: Index<usize> + ?Sized,
New: Index<usize> + ?Sized,
New::Output: PartialEq<Old::Output>,
{
new_range
.zip(old_range)
.take_while(|x| new[x.0] == old[x.1])
.count()
}
#[cfg(test)]
mod tests {
use pretty_assertions::assert_eq;
use super::*;
#[test]
fn test_common_prefix_len() {
assert_eq!(
common_prefix_len("".as_bytes(), 0..0, "".as_bytes(), 0..0),
0
);
assert_eq!(
common_prefix_len("foobarbaz".as_bytes(), 0..9, "foobarblah".as_bytes(), 0..10),
7
);
assert_eq!(
common_prefix_len("foobarbaz".as_bytes(), 0..9, "blablabla".as_bytes(), 0..9),
0
);
assert_eq!(
common_prefix_len("foobarbaz".as_bytes(), 3..9, "foobarblah".as_bytes(), 3..10),
4
);
}
}

View file

@ -0,0 +1,48 @@
use core::ops::{Index, Range};
/// Given two lookups and ranges calculates the length of common suffix.
/// Copied from <https://github.com/mitsuhiko/similar/blob/7e15c44de11a1cd61e1149189929e189ef977fd8/src/algorithms/utils.rs>
pub fn common_suffix_len<Old, New>(
old: &Old,
old_range: Range<usize>,
new: &New,
new_range: Range<usize>,
) -> usize
where
Old: Index<usize> + ?Sized,
New: Index<usize> + ?Sized,
New::Output: PartialEq<Old::Output>,
{
new_range
.rev()
.zip(old_range.rev())
.take_while(|x| new[x.0] == old[x.1])
.count()
}
#[cfg(test)]
mod tests {
use pretty_assertions::assert_eq;
use super::*;
#[test]
fn test_common_suffix_len() {
assert_eq!(
common_suffix_len("".as_bytes(), 0..0, "".as_bytes(), 0..0),
0
);
assert_eq!(
common_suffix_len("1234".as_bytes(), 0..4, "X0001234".as_bytes(), 0..8),
4
);
assert_eq!(
common_suffix_len("1234".as_bytes(), 0..4, "Xxxx".as_bytes(), 0..4),
0
);
assert_eq!(
common_suffix_len("1234".as_bytes(), 2..4, "01234".as_bytes(), 2..5),
2
);
}
}

View file

@ -0,0 +1,103 @@
use crate::Token;
/// Given two lists of tokens, returns `length` where `old` list somewhere
/// within contains the `length` prefix of the `new` list.
///
/// ## Example
///
/// ```not_rust
/// old: [0, 1, 9, 0, 2, 5]
/// new: [9, 0, 2, 5, 1]
/// ```
/// > results in an length of 4
///
///
/// ```not_rust
/// old: [0, 1, 9, 0, 2, 5]
/// new: [0, 2]
/// ```
/// > results in an length of 2
///
/// ```not_rust
/// old: [0, 1, 9, 0, 2, 5]
/// new: [0, 4]
/// ```
/// > results in an length of 1
pub fn find_longest_prefix_contained_within<T>(old: &[Token<T>], new: &[Token<T>]) -> usize
where
T: PartialEq + Clone + std::fmt::Debug,
{
let max_possible = new.len().min(old.len());
for len in (1..=max_possible).rev() {
let prefix = &new[..len];
if old.windows(len).any(|window| window == prefix) {
return len;
}
}
0
}
#[cfg(test)]
mod tests {
use pretty_assertions::assert_eq;
use super::*;
#[test]
fn test_common_overlap() {
assert_eq!(
find_longest_prefix_contained_within(&["".into()], &["".into()]),
1
);
assert_eq!(
find_longest_prefix_contained_within(
&["a".into(), "b".into(), "c".into()],
&["b".into(), "c".into(), "a".into()]
),
2
);
assert_eq!(
find_longest_prefix_contained_within(
&["a".into(), "b".into(), "c".into()],
&["b".into(), "c".into()]
),
2
);
assert_eq!(
find_longest_prefix_contained_within(
&["a".into(), "b".into(), "c".into()],
&["b".into()]
),
1
);
assert_eq!(
find_longest_prefix_contained_within(
&["a".into(), "b".into(), "c".into(), "b".into(), "a".into()],
&["b".into(), "a".into()]
),
2
);
assert_eq!(
find_longest_prefix_contained_within(
&["a".into(), "a".into(), "a".into()],
&["a".into(), "b".into(), "c".into()]
),
1
);
assert_eq!(
find_longest_prefix_contained_within(
&["a".into(), "b".into(), "c".into()],
&["d".into(), "e".into(), "a".into()]
),
0
);
}
}

View file

@ -0,0 +1,86 @@
use core::{cmp::Ordering, iter::Peekable};
pub struct MergeAscending<L, R, F, O>
where
L: Iterator<Item = R::Item>,
R: Iterator,
F: Fn(&R::Item) -> O,
O: PartialOrd,
{
left: Peekable<L>,
right: Peekable<R>,
get_key: F,
}
impl<L, R, F, O> MergeAscending<L, R, F, O>
where
L: Iterator<Item = R::Item>,
R: Iterator,
F: Fn(&R::Item) -> O,
O: PartialOrd,
{
fn new(left: L, right: R, get_key: F) -> Self {
MergeAscending {
left: left.peekable(),
right: right.peekable(),
get_key,
}
}
}
impl<L, R, F, O> Iterator for MergeAscending<L, R, F, O>
where
L: Iterator<Item = R::Item>,
R: Iterator,
F: Fn(&R::Item) -> O,
O: PartialOrd,
{
type Item = L::Item;
fn next(&mut self) -> Option<L::Item> {
let order = match (self.left.peek(), self.right.peek()) {
(Some(l), Some(r)) => (self.get_key)(l).partial_cmp(&(self.get_key)(r)),
(Some(_), None) => Some(Ordering::Less),
(None, Some(_)) => Some(Ordering::Greater),
(None, None) => return None,
};
match order {
Some(Ordering::Less | Ordering::Equal) | None => self.left.next(),
Some(Ordering::Greater) => self.right.next(),
}
}
}
pub trait MergeSorted: Iterator {
fn merge_sorted_by_key<R, F, O>(self, other: R, get_key: F) -> MergeAscending<Self, R, F, O>
where
Self: Sized,
R: Iterator<Item = Self::Item>,
F: Fn(&Self::Item) -> O,
O: PartialOrd,
{
MergeAscending::new(self, other, get_key)
}
}
impl<T: ?Sized> MergeSorted for T where T: Iterator {}
#[cfg(test)]
mod tests {
use pretty_assertions::assert_eq;
use super::*;
#[test]
fn test_merge_sorted_by_key() {
let left = [9, 7, 5, 3, 1];
let right = [7, 6, 5, 4, 3];
let result: Vec<i32> = left
.into_iter()
.merge_sorted_by_key(right.into_iter(), |x| -1 * x)
.collect();
assert_eq!(result, vec![9, 7, 7, 6, 5, 5, 4, 3, 3, 1]);
}
}

View file

@ -0,0 +1,14 @@
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::operation_transformation::Operation;
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone, PartialEq)]
pub struct OrderedOperation<T>
where
T: PartialEq + Clone + std::fmt::Debug,
{
pub order: usize,
pub operation: Operation<T>,
}

View file

@ -0,0 +1,16 @@
use std::fmt::Display;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Side {
Left,
Right,
}
impl Display for Side {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Side::Left => write!(f, "Left"),
Side::Right => write!(f, "Right"),
}
}
}

View file

@ -0,0 +1,111 @@
use core::ops::Range;
/// A helper for building a string in order based on an original string and a
/// series of insertions and deletions applied to it. It is safe to use with
/// UTF-8 strings as all operations are based on character indices.
#[derive(Debug, Clone)]
pub struct StringBuilder<'a> {
original: &'a str,
last_old_char_index: usize,
buffer: String,
}
impl StringBuilder<'_> {
pub fn new(original: &str) -> StringBuilder<'_> {
StringBuilder {
original,
last_old_char_index: 0,
buffer: String::with_capacity(original.len()),
}
}
/// Insert a string at the given index after copying the original string up
/// to that index from the last insertion or deletion.
pub fn insert(&mut self, from: usize, text: &str) {
self.copy_until(from);
self.buffer.push_str(text);
}
/// Delete a string at the given index after copying the original string up
/// to that index from the last insertion or deletion.
pub fn delete(&mut self, range: core::ops::Range<usize>) {
self.copy_until(range.start);
self.last_old_char_index += range.len();
}
fn copy_until(&mut self, index: usize) {
let current_char_count = self.buffer.chars().count();
debug_assert!(
index >= current_char_count,
"String builder only support building in order"
);
let jump = index - current_char_count;
self.buffer.push_str(
&self
.original
.chars()
.skip(self.last_old_char_index)
.take(jump)
.collect::<String>(),
);
self.last_old_char_index += jump;
}
/// Finish building the string after copying the remaining original string
/// since the last insertion or deletion.
pub fn build(mut self) -> String {
self.buffer.push_str(
&self
.original
.chars()
.skip(self.last_old_char_index)
.collect::<String>(),
);
self.buffer
}
#[allow(dead_code)]
pub fn get_slice(&self, range: Range<usize>) -> String {
let result = self
.buffer
.chars()
.chain(self.original.chars().skip(self.last_old_char_index))
.skip(range.start)
.take(range.end - range.start)
.collect::<String>();
debug_assert_eq!(result.chars().count(), range.len(), "Range out of bounds",);
result
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_string_builder() {
let original = "aaa bbb ccc";
let mut builder = StringBuilder::new(original);
builder.insert(0, "ddd ");
builder.delete(4..8);
builder.insert(11, " eee");
assert_eq!(builder.build(), "ddd bbb ccc eee");
}
#[test]
fn test_string_builder2() {
let original = "abcde";
let mut builder = StringBuilder::new(original);
builder.delete(1..4);
assert_eq!(builder.build(), "ae");
}
}

View file

@ -0,0 +1,98 @@
use std::{fs, path::Path};
use pretty_assertions::assert_eq;
use reconcile::{CursorPosition, TextWithCursors};
use serde::Deserialize;
/// `ExampleDocument` represents a test case for the reconciliation process.
/// It contains a parent string, left and right strings with cursor positions,
/// and the expected result after reconciliation.
///
/// '|' characters in the left, right, and expected strings are treated as
/// cursor positions and are converted into `CursorPosition` objects.
#[derive(Debug, Deserialize, Clone, PartialEq, Eq)]
pub struct ExampleDocument {
parent: String,
left: String,
right: String,
expected: String,
}
impl ExampleDocument {
/// Creates a new `ExampleDocument` instance from a YAML file.
///
/// # Panics
///
/// If the file cannot be opened or parsed, the program will panic.
#[must_use]
pub fn from_yaml(path: &Path) -> Self {
let file = fs::File::open(path).expect("Failed to open example file");
serde_yaml::from_reader(file).expect("Failed to parse example file")
}
#[must_use]
pub fn parent(&self) -> String { self.parent.clone() }
#[must_use]
pub fn left(&self) -> TextWithCursors<'static> {
ExampleDocument::string_to_text_with_cursors(&self.left)
}
#[must_use]
pub fn right(&self) -> TextWithCursors<'static> {
ExampleDocument::string_to_text_with_cursors(&self.right)
}
/// Asserts that the result string matches the expected string,
/// including cursor positions.
///
/// # Panics
///
/// If the result string does not match the expected string, the program
/// will panic.
pub fn assert_eq(&self, result: &TextWithCursors<'static>) {
let result_str = ExampleDocument::text_with_cursors_to_string(result);
assert_eq!(result_str, self.expected);
}
/// Asserts that the result string matches the expected string,
/// ignoring cursor positions.
///
/// # Panics
///
/// If the result string does not match the expected string, the program
/// will panic.
pub fn assert_eq_without_cursors(&self, result: &str) {
assert_eq!(
result,
ExampleDocument::string_to_text_with_cursors(&self.expected).text,
);
}
fn text_with_cursors_to_string(text: &TextWithCursors<'_>) -> String {
let mut result = text.text.clone().into_owned();
for (i, cursor) in text.cursors.iter().enumerate() {
result.insert(cursor.char_index + i, '|');
}
result
}
fn string_to_text_with_cursors(text: &str) -> TextWithCursors<'static> {
let cursors = Self::parse_cursors(text);
let text = text.replace('|', "");
TextWithCursors::new_owned(text, cursors)
}
fn parse_cursors(text: &str) -> Vec<CursorPosition> {
let mut cursors = Vec::new();
for (i, c) in text.chars().enumerate() {
if c == '|' {
cursors.push(CursorPosition {
id: 0,
char_index: i - cursors.len(),
});
}
}
cursors
}
}

View file

@ -0,0 +1,6 @@
# The `|` characters denote cursor positions which are stripped before the actual reconcile logic is run
---
parent: You're Annual Savings Statement is available in our online portal
left: Your| annual record is available in our online portal|
right: You're Annual Savings information| is available online
expected: Your| annual record information| is available online|

View file

@ -0,0 +1,4 @@
parent: marketplace
left: market| place
right: market|space
expected: market| placemarket|space

View file

@ -0,0 +1,4 @@
parent: Please remember to bring your laptop and charger
left: Please remember to bring your laptop|
right: Please remember to bring your |new |laptop and charger
expected: Please remember to bring your |new |laptop|

View file

@ -0,0 +1,4 @@
parent: Party A shall pay Party B
left: Party C shall pay Party B
right: Party A shall receive from Party B
expected: Party C shall receive from Party B

View file

@ -0,0 +1,4 @@
parent: Please submit your assignment by Friday
left: Please submit your |completed |assignment by Friday
right: Please submit your assignment |online |by Friday
expected: Please submit your |completed |assignment |online |by Friday

View file

@ -0,0 +1,4 @@
parent:
left: hi my friend|
right: hi there|
expected: hi my friend| there|

View file

@ -0,0 +1,4 @@
parent: Buy milk and eggs
left: Buy organic milk| and eggs|
right: Buy milk and eggs| and bread
expected: Buy organic milk| and eggs|| and bread

View file

@ -0,0 +1,4 @@
parent: Meeting at 2pm in 会议室
left: Meeting at |3pm in the 会议室
right: Team meeting at 2pm in conference room|
expected: Team meeting at |3pm in conference room| the

View file

@ -0,0 +1,4 @@
parent: Send the report to the team
left: Send the |detailed |report to the |entire |team
right: Send the |quarterly |detailed |report to the team
expected: Send the |detailed |quarterly |detailed ||report to the |entire |team

View file

@ -0,0 +1,4 @@
parent: Ready, Set go
left: Ready! Set go|
right: Ready, Set, go!|
expected: Ready! Set, go!||

View file

@ -0,0 +1,4 @@
parent: "Total: $100"
left: "Total: |$150"
right: "Total: |€100"
expected: "Total: |$150 |€100"

View file

@ -0,0 +1,4 @@
parent: Start middle end
left: Start [important] middle end|
right: Start middle [critical] end|
expected: Start [important] middle [critical] end||

View file

@ -0,0 +1,4 @@
parent: A B C D
left: A X B D|
right: A B Y|
expected: A X B Y||

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,46 @@
mod example_document;
use std::{fs, path::Path};
use example_document::ExampleDocument;
use reconcile::{reconcile, reconcile_with_cursors};
#[test]
fn test_with_examples() {
let examples_dir = Path::new("tests/examples");
let mut entries = fs::read_dir(examples_dir)
.expect("Failed to read examples directory")
.collect::<Vec<_>>();
entries.sort_by_key(|entry| {
let path = entry
.as_ref()
.expect("Failed to read directory entry")
.path();
path.file_name()
.and_then(|name| name.to_str())
.and_then(|name| name.split('.').next().unwrap().parse::<i32>().ok())
.unwrap_or_default()
});
for entry in entries {
let entry = entry.expect("Failed to read directory entry");
let path = entry.path();
if path.is_file() && path.extension().and_then(|ext| ext.to_str()) == Some("yml") {
let doc = ExampleDocument::from_yaml(&path);
println!("Testing with example from {}", path.display());
doc.assert_eq_without_cursors(&reconcile(
&doc.parent(),
&doc.left().text,
&doc.right().text,
));
doc.assert_eq(&reconcile_with_cursors(
&doc.parent(),
doc.left(),
doc.right(),
));
}
}
}

View file

@ -0,0 +1,4 @@
[toolchain]
channel = "nightly-2025-03-14"
targets = [ "x86_64-unknown-linux-gnu", "x86_64-unknown-linux-musl" ]
profile = "default"

8
backend/rustfmt.toml Normal file
View file

@ -0,0 +1,8 @@
imports_granularity = "crate"
condense_wildcard_suffixes = true
fn_single_line = true
format_strings = true
reorder_impl_items = true
group_imports = "StdExternalCrate"
use_field_init_shorthand = true
wrap_comments=true

View file

@ -0,0 +1,32 @@
[package]
name = "sync_lib"
version.workspace = true
edition.workspace = true
authors.workspace = true
license.workspace = true
repository.workspace = true
[lib]
crate-type = ["cdylib", "rlib"]
[dependencies]
base64 = "0.22.1"
reconcile = { path = "../reconcile" }
wasm-bindgen = "0.2.99"
thiserror = { workspace = true }
# The `console_error_panic_hook` crate provides better debugging of panics by
# logging them with `console.error`. This is great for development, but requires
# all the `std::fmt` and `std::panicking` infrastructure, so isn't great for
# code size when deploying.
console_error_panic_hook = { version = "0.1.7", optional = true }
[dev-dependencies]
wasm-bindgen-test = "0.3.49"
insta = "1.42.2"
[features]
default = ["console_error_panic_hook"]
[lints]
workspace = true

View file

@ -0,0 +1,88 @@
use wasm_bindgen::prelude::*;
/// Wrapper type to expose `TextWithCursors` to JS.
#[wasm_bindgen]
#[derive(Debug, Clone, PartialEq)]
pub struct TextWithCursors {
text: String,
cursors: Vec<CursorPosition>,
}
#[wasm_bindgen]
impl TextWithCursors {
#[wasm_bindgen(constructor)]
#[must_use]
pub fn new(text: String, cursors: Vec<CursorPosition>) -> Self { Self { text, cursors } }
#[must_use]
pub fn text(&self) -> String { self.text.clone() }
#[must_use]
pub fn cursors(&self) -> Vec<CursorPosition> { self.cursors.clone() }
}
impl From<TextWithCursors> for reconcile::TextWithCursors<'_> {
fn from(owned: TextWithCursors) -> Self {
reconcile::TextWithCursors::new_owned(
owned.text.to_string(),
owned
.cursors
.into_iter()
.map(std::convert::Into::into)
.collect(),
)
}
}
impl From<reconcile::TextWithCursors<'_>> for TextWithCursors {
fn from(text_with_cursors: reconcile::TextWithCursors<'_>) -> Self {
TextWithCursors {
text: text_with_cursors.text.into_owned(),
cursors: text_with_cursors
.cursors
.into_iter()
.map(std::convert::Into::into)
.collect(),
}
}
}
/// Wrapper type to expose `CursorPosition` to JS.
#[wasm_bindgen]
#[derive(Debug, Clone, PartialEq)]
pub struct CursorPosition {
id: usize,
char_index: usize,
}
#[wasm_bindgen]
impl CursorPosition {
#[wasm_bindgen(constructor)]
#[must_use]
pub fn new(id: usize, char_index: usize) -> Self { Self { id, char_index } }
#[must_use]
pub fn id(&self) -> usize { self.id }
#[wasm_bindgen(js_name = characterPosition)]
#[must_use]
pub fn char_index(&self) -> usize { self.char_index }
}
impl From<CursorPosition> for reconcile::CursorPosition {
fn from(owned: CursorPosition) -> Self {
reconcile::CursorPosition {
id: owned.id,
char_index: owned.char_index,
}
}
}
impl From<reconcile::CursorPosition> for CursorPosition {
fn from(cursor: reconcile::CursorPosition) -> Self {
CursorPosition {
id: cursor.id,
char_index: cursor.char_index,
}
}
}

View file

@ -0,0 +1,29 @@
use base64::DecodeError;
use thiserror::Error;
use wasm_bindgen::JsValue;
#[derive(Error, Debug)]
pub enum SyncLibError {
#[error("Base64 decoding error because of {}", .reason)]
Base64DecodingError { reason: String },
}
impl From<DecodeError> for SyncLibError {
fn from(e: DecodeError) -> Self {
SyncLibError::Base64DecodingError {
reason: e.to_string(),
}
}
}
impl From<std::string::FromUtf8Error> for SyncLibError {
fn from(e: std::string::FromUtf8Error) -> Self {
SyncLibError::Base64DecodingError {
reason: e.to_string(),
}
}
}
impl From<SyncLibError> for JsValue {
fn from(val: SyncLibError) -> Self { JsValue::from_str(&val.to_string()) }
}

152
backend/sync_lib/src/lib.rs Normal file
View file

@ -0,0 +1,152 @@
//! This crate provides utilities for easily communicating between backend &
//! frontend and ensuring the same logic for encoding and decoding binary data,
//! and 3-way-merging documents in Rust and JavaScript.
//!
//! The crate is designed to be used as a Rust library and as a
//! TypeScript/JavaScript package through WebAssembly (WASM).
//!
//! # Modules
//!
//! - `errors`: Contains error types used in this crate.
use core::str;
use base64::{Engine as _, engine::general_purpose::STANDARD};
use cursor::TextWithCursors;
use errors::SyncLibError;
use wasm_bindgen::prelude::*;
pub mod cursor;
pub mod errors;
/// Encode binary data for easy transport over HTTP. Inverse of
/// `base64_to_bytes`.
///
/// # Arguments
///
/// - `input`: The binary data to encode.
///
/// # Returns
///
/// The base64-encoded string.
///
/// # Panics
///
/// If the input is not valid UTF-8.
#[wasm_bindgen(js_name = bytesToBase64)]
#[must_use]
pub fn bytes_to_base64(input: &[u8]) -> String {
set_panic_hook();
STANDARD.encode(input)
}
/// Inverse of `bytes_to_base64`.
/// Decode base64-encoded data into binary data.
///
/// # Arguments
///
/// - `input`: The base64-encoded string.
///
/// # Returns
///
/// The decoded binary data.
///
/// # Errors
///
/// If the input is not valid base64.
#[wasm_bindgen(js_name = base64ToBytes)]
pub fn base64_to_bytes(input: &str) -> Result<Vec<u8>, SyncLibError> {
set_panic_hook();
STANDARD.decode(input).map_err(SyncLibError::from)
}
/// Merge two documents with a common parent. Relies on `reconcile::reconcile`
/// for texts and returns the right document as-is if either of the updated
/// documents is binary.
///
/// # Arguments
///
/// - `parent`: The common parent document.
/// - `left`: The left document updated by one user.
/// - `right`: The right document updated by another user.
///
/// # Returns
///
/// The merged document.
///
/// # Panics
///
/// If any of the input documents are not valid UTF-8 strings.
#[wasm_bindgen]
#[must_use]
pub fn merge(parent: &[u8], left: &[u8], right: &[u8]) -> Vec<u8> {
set_panic_hook();
if is_binary(parent) || is_binary(left) || is_binary(right) {
right.to_vec()
} else {
reconcile::reconcile(
str::from_utf8(parent).expect("parent must be valid UTF-8 because it's not binary"),
str::from_utf8(left).expect("left must be valid UTF-8 because it's not binary"),
str::from_utf8(right).expect("right must be valid UTF-8 because it's not binary"),
)
.into_bytes()
}
}
/// WASM wrapper around `reconcile::reconcile` for merging text.
#[wasm_bindgen(js_name = mergeText)]
#[must_use]
pub fn merge_text(parent: &str, left: &str, right: &str) -> String {
set_panic_hook();
reconcile::reconcile(parent, left, right)
}
/// WASM wrapper around `reconcile::reconcile_with_cursors` for merging text.
#[wasm_bindgen(js_name = mergeTextWithCursors)]
#[must_use]
pub fn merge_text_with_cursors(
parent: &str,
left: TextWithCursors,
right: TextWithCursors,
) -> TextWithCursors {
set_panic_hook();
reconcile::reconcile_with_cursors(parent, left.into(), right.into()).into()
}
/// Heuristically determine if the given data is a binary or a text file's
/// content.
#[wasm_bindgen(js_name = isBinary)]
#[must_use]
pub fn is_binary(data: &[u8]) -> bool {
set_panic_hook();
if data.contains(&0) {
// Even though the NUL character is valid in UTF-8, it's highly suspicious in
// human-readable text.
return true;
}
std::str::from_utf8(data).is_err()
}
/// We don't want to support merging structured data like JSON, YAML, etc.
#[wasm_bindgen(js_name = isFileTypeMergable)]
#[must_use]
pub fn is_file_type_mergable(path_or_file_name: &str) -> bool {
set_panic_hook();
let file_extension = path_or_file_name.split('.').next_back().unwrap_or_default();
matches!(file_extension.to_lowercase().as_str(), "md" | "txt")
}
fn set_panic_hook() {
// https://github.com/rustwasm/console_error_panic_hook#readme
#[cfg(feature = "console_error_panic_hook")]
console_error_panic_hook::set_once();
}

View file

@ -0,0 +1,10 @@
---
source: sync_lib/tests/web.rs
expression: base64_to_bytes(input)
snapshot_kind: text
---
Err(
Base64DecodingError {
reason: "Invalid symbol 61, offset 0.",
},
)

View file

@ -0,0 +1,99 @@
use insta::assert_debug_snapshot;
use sync_lib::{
cursor::{CursorPosition, TextWithCursors},
*,
};
use wasm_bindgen_test::*;
#[wasm_bindgen_test(unsupported = test)]
fn test_bytes_to_base64() {
let input = b"hello";
let expected = "aGVsbG8=";
assert_eq!(bytes_to_base64(input), expected);
}
#[wasm_bindgen_test(unsupported = test)]
fn test_base64_to_bytes() {
let input = "aGVsbG8=";
let expected = b"hello".to_vec();
assert_eq!(base64_to_bytes(input).unwrap(), expected);
}
#[test] // insta doesn't support wasm-bindgen-test
fn test_base64_to_bytes_error() {
let input = "===";
assert_debug_snapshot!(base64_to_bytes(input));
}
#[wasm_bindgen_test(unsupported = test)]
fn test_merge() {
let left = b"hello ";
let right = b"world";
let result = merge(b"", left, right);
assert_eq!(result, b"hello world");
let left = b"\0binary";
let right = b"other";
let result = merge(b"", left, right);
assert_eq!(result, right);
}
#[wasm_bindgen_test(unsupported = test)]
fn test_merge_text() {
let left = "hello ";
let right = "world";
let result = merge_text("", left, right);
assert_eq!(result, "hello world");
}
#[wasm_bindgen_test(unsupported = test)]
fn test_merge_text_with_cursors() {
let result = merge_text_with_cursors(
"hi",
TextWithCursors::new("hi world".to_owned(), vec![]),
TextWithCursors::new(
"hi".to_owned(),
vec![CursorPosition::new(0, 1), CursorPosition::new(1, 2)],
),
);
assert_eq!(
result,
TextWithCursors::new(
"hi world".to_owned(),
vec![CursorPosition::new(0, 1), CursorPosition::new(1, 2)]
),
);
}
#[wasm_bindgen_test(unsupported = test)]
fn merge_binary() {
let left = [0, 1, 2];
let right = [3, 4, 5];
assert_eq!(merge(b"", &left, &right), right);
}
#[wasm_bindgen_test(unsupported = test)]
fn test_is_binary() {
assert!(is_binary(&[0, 159, 146, 150]));
assert!(is_binary(&[0, 12]));
assert!(!is_binary(b"hello"));
}
#[wasm_bindgen_test(unsupported = test)]
fn test_is_binary_empty() {
assert!(!is_binary(b""));
}
#[wasm_bindgen_test(unsupported = test)]
fn test_is_file_type_mergable() {
assert!(is_file_type_mergable(".md"));
assert!(is_file_type_mergable("hi.md"));
assert!(is_file_type_mergable("my/path/to/my/document.md"));
assert!(is_file_type_mergable("hi.MD"));
assert!(is_file_type_mergable("my/path/to/my/DOCUMENT.MD"));
assert!(!is_file_type_mergable(".json"));
assert!(!is_file_type_mergable("HELLO.JSON"));
assert!(!is_file_type_mergable("my/config.yml"));
}

View file

@ -0,0 +1,42 @@
[package]
name = "sync_server"
version.workspace = true
edition.workspace = true
authors.workspace = true
license.workspace = true
repository.workspace = true
[dependencies]
reconcile = { path = "../reconcile" }
sync_lib = { path = "../sync_lib" }
serde = { workspace = true }
thiserror = { workspace = true }
tokio = { version = "1.44.1", features = ["full"]}
uuid = { version = "1.16.0", features = ["v4", "serde"] }
log = { version = "0.4.22" }
anyhow = { version = "1.0.97", features = ["backtrace"] }
axum = { version = "0.7.4", features = ["ws", "macros", "tracing", "multipart"]}
axum-extra = { version = "0.9.6", features = ["typed-header"] }
aide-axum-typed-multipart = "0.13.0"
axum_typed_multipart = "0.11.0"
tower-http = { version = "0.6.1", features = ["cors", "trace", "limit"] }
tracing-subscriber = { version = "0.3.19", features = ["fmt", "env-filter"]}
serde_yaml = "0.9.34"
sqlx = { version = "0.8.3", features = ["sqlite", "runtime-tokio", "uuid", "chrono"] }
chrono = { version = "0.4.40", features = ["serde"] }
aide = { version = "0.13.4", features = ["axum", "axum-ws", "scalar", "axum-headers"] }
schemars = { version = "0.8.21", features = ["chrono", "uuid1", "bytes"] }
tracing = "0.1.41"
rand = "0.8.5"
sanitize-filename = "0.6.0"
axum-jsonschema = { version = "0.8.0", features = ["aide"] }
regex = "1.11.1"
clap = { version = "4.5.32", features = ["derive"] }
futures = "0.3.31"
serde_json = "1.0.140"
clap-verbosity-flag = "3.0.2"
[lints]
workspace = true

View file

@ -0,0 +1,4 @@
cargo install sqlx-cli
rm db.sqlite3; sqlx database create --database-url sqlite://db.sqlite3
sqlx migrate run --source sync_server/src/app_state/database/migrations --database-url sqlite://db.sqlite3

View file

@ -0,0 +1,34 @@
pub mod broadcasts;
pub mod database;
use std::ffi::OsString;
use anyhow::Result;
use broadcasts::Broadcasts;
use database::Database;
use crate::{config::Config, consts::DEFAULT_CONFIG_PATH};
#[derive(Clone, Debug)]
pub struct AppState {
pub config: Config,
pub database: Database,
pub broadcasts: Broadcasts,
}
impl AppState {
pub async fn try_new(config_path: Option<OsString>) -> Result<Self> {
let config_path = config_path.unwrap_or_else(|| OsString::from(DEFAULT_CONFIG_PATH));
let path = std::path::PathBuf::from(config_path);
let config = Config::read_or_create(&path).await?;
let database = Database::try_new(&config.database).await?;
let broadcasts = Broadcasts::new(&config.server);
Ok(Self {
config,
database,
broadcasts,
})
}
}

View file

@ -0,0 +1,57 @@
use std::{collections::HashMap, sync::Arc};
use anyhow::Context;
use tokio::sync::{Mutex, broadcast};
use super::database::models::{DocumentVersionWithoutContent, VaultId};
use crate::{config::server_config::ServerConfig, errors::server_error};
#[derive(Debug, Clone)]
pub struct Broadcasts {
max_clients_per_vault: usize,
tx: Arc<Mutex<HashMap<VaultId, broadcast::Sender<DocumentVersionWithoutContent>>>>,
}
impl Broadcasts {
pub fn new(server_config: &ServerConfig) -> Self {
Self {
max_clients_per_vault: server_config.max_clients_per_vault,
tx: Arc::new(Mutex::new(HashMap::new())),
}
}
pub async fn get_receiver(
&self,
vault: VaultId,
) -> broadcast::Receiver<DocumentVersionWithoutContent> {
let tx = self.get_or_create(vault).await;
tx.subscribe()
}
/// Sent a document update to all clients subscribed to the vault.
/// We ignore & log failures.
pub async fn send(&self, vault: VaultId, document: DocumentVersionWithoutContent) {
let tx = self.get_or_create(vault).await;
let result = tx
.send(document)
.context("Cannot broadcast update message to websocket listeners")
.map_err(server_error);
if result.is_err() {
log::debug!("Failed to send message: {result:?}");
}
}
async fn get_or_create(
&self,
vault: VaultId,
) -> broadcast::Sender<DocumentVersionWithoutContent> {
let mut tx = self.tx.lock().await;
tx.entry(vault)
.or_insert_with(|| broadcast::channel(self.max_clients_per_vault).0.clone())
.clone()
}
}

View file

@ -2,62 +2,38 @@ use core::time::Duration;
use std::{collections::HashMap, sync::Arc};
use anyhow::{Context as _, Result};
use log::info;
use models::{
DocumentId, DocumentVersionWithoutContent, StoredDocumentVersion, VaultId, VaultUpdateId,
};
use sqlx::{ConnectOptions, sqlite::SqliteConnectOptions, types::chrono::Utc};
use sqlx::{sqlite::SqliteConnectOptions, types::chrono::Utc};
pub mod models;
use sqlx::{Pool, Sqlite, sqlite::SqlitePoolOptions};
use tokio::sync::Mutex;
use tokio::time::Instant;
use uuid::fmt::Hyphenated;
use super::websocket::{
broadcasts::Broadcasts,
models::{WebSocketServerMessage, WebSocketServerMessageWithOrigin, WebSocketVaultUpdate},
};
use crate::config::database_config::DatabaseConfig;
#[derive(Clone)]
struct PoolWithTimestamp {
pool: Pool<Sqlite>,
last_accessed: Instant,
}
impl std::fmt::Debug for PoolWithTimestamp {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("PoolWithTimestamp")
.field("pool", &"Pool<Sqlite>")
.field("last_accessed", &self.last_accessed)
.finish()
}
}
#[derive(Clone, Debug)]
pub struct Database {
config: DatabaseConfig,
broadcasts: Broadcasts,
connection_pools: Arc<Mutex<HashMap<VaultId, PoolWithTimestamp>>>,
connection_pools: Arc<Mutex<HashMap<VaultId, Pool<Sqlite>>>>,
}
pub type Transaction<'a> = sqlx::Transaction<'a, Sqlite>;
impl Database {
pub async fn try_new(config: &DatabaseConfig, broadcasts: &Broadcasts) -> Result<Self> {
pub async fn try_new(config: &DatabaseConfig) -> Result<Self> {
tokio::fs::create_dir_all(&config.databases_directory_path)
.await
.with_context(|| {
format!(
"Failed to create databases directory at `{}`",
"Failed to create databases directory: {}",
config.databases_directory_path.to_string_lossy()
)
})?;
let mut connection_pools = std::collections::HashMap::new();
info!("Applying pending database migrations");
let mut entries = tokio::fs::read_dir(&config.databases_directory_path).await?;
while let Some(entry) = entries.next_entry().await? {
if !entry.file_name().to_string_lossy().ends_with(".sqlite") {
@ -70,26 +46,16 @@ impl Database {
.trim_end_matches(".sqlite")
.to_owned();
let pool = Self::create_vault_database(config, &vault).await?;
connection_pools.insert(
vault.clone(),
PoolWithTimestamp {
pool,
last_accessed: Instant::now(),
},
Self::create_vault_database(config, &vault).await?,
);
}
let database = Self {
Ok(Self {
config: config.clone(),
connection_pools: Arc::new(Mutex::new(connection_pools)),
broadcasts: broadcasts.clone(),
};
// Start background task to cleanup idle connection pools
database.start_idle_pool_cleanup();
Ok(database)
})
}
async fn create_vault_database(
@ -103,18 +69,15 @@ impl Database {
let connection_options = SqliteConnectOptions::new()
.filename(file_name.clone())
.create_if_missing(true)
.auto_vacuum(sqlx::sqlite::SqliteAutoVacuum::Full)
.busy_timeout(Duration::from_secs(3600))
.journal_mode(sqlx::sqlite::SqliteJournalMode::Wal)
.log_slow_statements(log::LevelFilter::Warn, Duration::from_secs(30));
.journal_mode(sqlx::sqlite::SqliteJournalMode::Wal);
let pool = SqlitePoolOptions::new()
.max_connections(config.max_connections_per_vault)
.acquire_slow_threshold(Duration::from_secs(30))
.test_before_acquire(true)
.connect_with(connection_options)
.await
.with_context(|| format!("Cannot open database at `{}`", file_name.display()))?;
.with_context(|| format!("Cannot open database at {}", file_name.display()))?;
Self::run_migrations(&pool).await?;
@ -130,26 +93,16 @@ impl Database {
async fn get_connection_pool(&self, vault: &VaultId) -> Result<Pool<Sqlite>> {
let mut pools = self.connection_pools.lock().await;
if !pools.contains_key(vault) {
let pool = Self::create_vault_database(&self.config, vault).await?;
pools.insert(
vault.clone(),
PoolWithTimestamp {
pool,
last_accessed: Instant::now(),
},
);
pools.insert(vault.clone(), pool);
}
let pool_with_timestamp = pools
.get_mut(vault)
let pool = pools
.get(vault)
.expect("Pool was just inserted or already exists");
// Update last accessed time
pool_with_timestamp.last_accessed = Instant::now();
Ok(pool_with_timestamp.pool.clone())
Ok(pool.clone())
}
/// Attempting to write from this transaction might result in a
@ -182,19 +135,17 @@ impl Database {
vault: &VaultId,
transaction: Option<&mut Transaction<'_>>,
) -> Result<Vec<DocumentVersionWithoutContent>> {
let query = sqlx::query!(
let query = sqlx::query_as!(
DocumentVersionWithoutContent,
r#"
select
select
vault_update_id,
document_id as "document_id: Hyphenated",
document_id as "document_id: Hyphenated",
relative_path,
updated_date as "updated_date: chrono::DateTime<Utc>",
is_deleted,
user_id,
device_id,
length(content) as "content_size: u64"
is_deleted
from latest_document_versions
order by vault_update_id
order by vault_update_id desc
"#,
);
@ -206,22 +157,6 @@ impl Database {
.await
}
.context("Cannot fetch latest documents")
.map(|rows| {
rows.into_iter()
.map(|row| DocumentVersionWithoutContent {
vault_update_id: row.vault_update_id,
document_id: row.document_id.into(),
relative_path: row.relative_path,
updated_date: row.updated_date,
is_deleted: row.is_deleted,
user_id: row.user_id,
device_id: row.device_id,
content_size: row
.content_size
.expect("Content size can't be null but sqlx can't infer it"),
})
.collect()
})
}
/// Return the latest state of all documents (including deleted) in the
@ -232,20 +167,18 @@ impl Database {
vault_update_id: VaultUpdateId,
transaction: Option<&mut Transaction<'_>>,
) -> Result<Vec<DocumentVersionWithoutContent>> {
let query = sqlx::query!(
let query = sqlx::query_as!(
DocumentVersionWithoutContent,
r#"
select
vault_update_id,
document_id as "document_id: Hyphenated",
relative_path,
updated_date as "updated_date: chrono::DateTime<Utc>",
is_deleted,
user_id,
device_id,
length(content) as "content_size: u64"
is_deleted
from latest_document_versions
where vault_update_id > ?
order by vault_update_id
order by vault_update_id desc
"#,
vault_update_id
);
@ -258,23 +191,7 @@ impl Database {
.await
}
.with_context(|| {
format!("Cannot fetch latest documents since vault_update_id `{vault_update_id}`")
})
.map(|rows| {
rows.into_iter()
.map(|row| DocumentVersionWithoutContent {
vault_update_id: row.vault_update_id,
document_id: row.document_id.into(),
relative_path: row.relative_path,
updated_date: row.updated_date,
is_deleted: row.is_deleted,
user_id: row.user_id,
device_id: row.device_id,
content_size: row
.content_size
.expect("Content size can't be null but sqlx can't infer it"),
})
.collect()
format!("Cannot fetch latest documents since vault_update_id {vault_update_id}")
})
}
@ -310,21 +227,18 @@ impl Database {
let query = sqlx::query_as!(
StoredDocumentVersion,
r#"
select
select
vault_update_id,
document_id as "document_id: Hyphenated",
document_id as "document_id: Hyphenated",
relative_path,
updated_date as "updated_date: chrono::DateTime<Utc>",
content,
is_deleted,
user_id,
device_id,
has_been_merged
is_deleted
from latest_document_versions
where relative_path = ? and is_deleted = false
where relative_path = ?
order by vault_update_id desc -- `latest_document_versions` only contains a single latest version of each document, however,
-- multiple documents can have the same `relative_path`, if they have been deleted. That's
-- why we only care about the latest version of the document with the given relative path.
-- multiple documents can have the same `relative_path`, if they have been deleted. That's
-- why we only care about the latest version of the document with the given relative path.
limit 1
"#,
relative_path
@ -350,16 +264,13 @@ impl Database {
let query = sqlx::query_as!(
StoredDocumentVersion,
r#"
select
select
vault_update_id,
document_id as "document_id: Hyphenated",
document_id as "document_id: Hyphenated",
relative_path,
updated_date as "updated_date: chrono::DateTime<Utc>",
content,
is_deleted,
user_id,
device_id,
has_been_merged
is_deleted
from latest_document_versions
where document_id = ?
"#,
@ -385,16 +296,13 @@ impl Database {
let query = sqlx::query_as!(
StoredDocumentVersion,
r#"
select
select
vault_update_id,
document_id as "document_id: Hyphenated",
document_id as "document_id: Hyphenated",
relative_path,
updated_date as "updated_date: chrono::DateTime<Utc>",
content,
is_deleted,
user_id,
device_id,
has_been_merged
is_deleted
from documents
where vault_update_id = ?"#,
vault_update_id
@ -410,106 +318,40 @@ impl Database {
.context("Cannot fetch document version")
}
// inserting the document must be the last step of the transaction if there's one
pub async fn insert_document_version(
&self,
vault_id: &VaultId,
vault: &VaultId,
version: &StoredDocumentVersion,
transaction: Option<Transaction<'_>>,
transaction: Option<&mut Transaction<'_>>,
) -> Result<()> {
let document_id = version.document_id.as_hyphenated();
let query = sqlx::query!(
r#"
insert into documents (
vault_update_id,
document_id,
document_id,
relative_path,
updated_date,
content,
is_deleted,
user_id,
device_id
is_deleted
)
values (?, ?, ?, ?, ?, ?, ?, ?)
values (?, ?, ?, ?, ?, ?)
"#,
version.vault_update_id,
document_id,
version.relative_path,
version.updated_date,
version.content,
version.is_deleted,
version.user_id,
version.device_id
version.is_deleted
);
if let Some(mut transaction) = transaction {
query
.execute(&mut *transaction)
.await
.context("Cannot insert document version")?;
transaction
.commit()
.await
.context("Failed to commit transaction")?;
if let Some(transaction) = transaction {
query.execute(&mut **transaction).await
} else {
query
.execute(&self.get_connection_pool(vault_id).await?)
.await
.context("Cannot insert document version")?;
query.execute(&self.get_connection_pool(vault).await?).await
}
self.broadcasts
.send_document_update(
vault_id.clone(),
WebSocketServerMessageWithOrigin::with_origin(
version.device_id.clone(),
WebSocketServerMessage::VaultUpdate(WebSocketVaultUpdate {
documents: vec![version.clone().into()],
is_initial_sync: false,
}),
),
)
.await;
.context("Cannot insert document version")?;
Ok(())
}
/// Cleanup idle connection pools that haven't been accessed in more than 5 minutes
async fn cleanup_idle_pools(&self) {
let mut pools = self.connection_pools.lock().await;
let now = Instant::now();
let idle_timeout = Duration::from_secs(5 * 60); // 5 minutes
// Collect vaults to remove
let vaults_to_remove: Vec<VaultId> = pools
.iter()
.filter(|(_, pool_with_timestamp)| {
now.duration_since(pool_with_timestamp.last_accessed) > idle_timeout
})
.map(|(vault_id, _)| vault_id.clone())
.collect();
// Close and remove idle pools
for vault_id in &vaults_to_remove {
if let Some(pool_with_timestamp) = pools.remove(vault_id) {
info!("Closing idle database connection pool for vault `{vault_id}`");
pool_with_timestamp.pool.close().await;
}
}
}
/// Start a background task that periodically cleans up idle connection pools
fn start_idle_pool_cleanup(&self) {
let database = self.clone();
tokio::spawn(async move {
let mut interval = tokio::time::interval(Duration::from_secs(60)); // Check every minute
interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
loop {
interval.tick().await;
database.cleanup_idle_pools().await;
}
});
}
}

View file

@ -1,14 +1,11 @@
use base64::{Engine as _, engine::general_purpose::STANDARD};
use chrono::{DateTime, Utc};
use schemars::JsonSchema;
use serde::Serialize;
use ts_rs::TS;
use sync_lib::bytes_to_base64;
pub type VaultId = String;
pub type VaultUpdateId = i64;
pub type DocumentId = uuid::Uuid;
pub type UserId = String;
pub type DeviceId = String;
#[derive(Debug, Clone)]
pub struct StoredDocumentVersion {
@ -18,33 +15,20 @@ pub struct StoredDocumentVersion {
pub updated_date: DateTime<Utc>,
pub content: Vec<u8>,
pub is_deleted: bool,
pub user_id: UserId,
pub device_id: DeviceId,
#[allow(dead_code)] // This is for manual analysis
pub has_been_merged: bool,
}
impl PartialEq<Self> for StoredDocumentVersion {
fn eq(&self, other: &Self) -> bool {
self.vault_update_id == other.vault_update_id
}
fn eq(&self, other: &Self) -> bool { self.vault_update_id == other.vault_update_id }
}
#[derive(TS, Debug, Clone, Serialize)]
#[derive(Debug, Clone, Serialize, JsonSchema)]
#[serde(rename_all = "camelCase")]
pub struct DocumentVersionWithoutContent {
#[ts(as = "i32")]
pub vault_update_id: VaultUpdateId,
pub document_id: DocumentId,
pub relative_path: String,
pub updated_date: DateTime<Utc>,
pub is_deleted: bool,
pub user_id: UserId,
pub device_id: DeviceId,
#[ts(as = "i32")]
pub content_size: u64,
}
impl From<StoredDocumentVersion> for DocumentVersionWithoutContent {
@ -55,26 +39,19 @@ impl From<StoredDocumentVersion> for DocumentVersionWithoutContent {
relative_path: value.relative_path,
updated_date: value.updated_date,
is_deleted: value.is_deleted,
user_id: value.user_id,
device_id: value.device_id,
content_size: value.content.len() as u64,
}
}
}
#[derive(TS, Debug, Clone, Serialize)]
#[derive(Debug, Clone, Serialize, JsonSchema)]
#[serde(rename_all = "camelCase")]
pub struct DocumentVersion {
#[ts(as = "i32")]
pub vault_update_id: VaultUpdateId,
pub document_id: DocumentId,
pub relative_path: String,
pub updated_date: DateTime<Utc>,
pub content_base64: String,
pub is_deleted: bool,
pub user_id: UserId,
pub device_id: DeviceId,
}
impl From<StoredDocumentVersion> for DocumentVersion {
@ -84,10 +61,8 @@ impl From<StoredDocumentVersion> for DocumentVersion {
document_id: value.document_id,
relative_path: value.relative_path,
updated_date: value.updated_date,
content_base64: STANDARD.encode(&value.content),
content_base64: bytes_to_base64(&value.content),
is_deleted: value.is_deleted,
user_id: value.user_id,
device_id: value.device_id,
}
}
}

View file

@ -1,6 +1,7 @@
use std::ffi::OsString;
use clap::Parser;
use clap_verbosity_flag::{InfoLevel, Verbosity};
use crate::cli::color_when::ColorWhen;
@ -11,6 +12,9 @@ pub struct Args {
#[arg(index = 1)]
pub config_path: Option<OsString>,
#[command(flatten)]
pub verbose: Verbosity<InfoLevel>,
#[arg(
long,
value_name = "WHEN",

View file

@ -2,15 +2,13 @@ use std::path::Path;
use anyhow::{Context as _, Result};
use database_config::DatabaseConfig;
use log::info;
use logging_config::LoggingConfig;
use log::{info, warn};
use serde::{Deserialize, Serialize};
use server_config::ServerConfig;
use tokio::fs;
use user_config::UserConfig;
pub mod database_config;
pub mod logging_config;
pub mod server_config;
pub mod user_config;
@ -22,40 +20,38 @@ pub struct Config {
pub server: ServerConfig,
#[serde(default)]
pub users: UserConfig,
#[serde(default)]
pub logging: LoggingConfig,
}
impl Config {
pub async fn read_or_create(path: &Path) -> Result<Self> {
let config = if path.exists() {
if path.exists() {
info!(
"Loading configuration from `{}`",
path.canonicalize().unwrap().display()
"Loading configuration from {:?}",
path.canonicalize().unwrap()
);
Self::load_from_file(path).await?
Self::load_from_file(path).await
} else {
Self::default()
};
config.write(path).await?;
info!(
"Updated configuration at `{}`",
path.canonicalize().unwrap().display()
);
Ok(config)
let config = Self::default();
config.write(path).await?;
warn!(
"Configuration file not found, wrote default configuration to {:?}",
path.canonicalize().unwrap()
);
Ok(config)
}
}
pub async fn load_from_file(path: &Path) -> Result<Self> {
let contents = fs::read_to_string(path).await.with_context(|| {
format!(
"Cannot load configuration from disk from `{}`",
"Cannot load configuration from disk from {}",
path.display()
)
})?;
serde_yaml::from_str(&contents).context("Failed to parse configuration")
let config = serde_yaml::from_str(&contents).context("Failed to parse configuration")?;
Ok(config)
}
pub async fn write(&self, path: &Path) -> Result<()> {

View file

@ -1,11 +1,9 @@
use std::{path::PathBuf, time::Duration};
use std::path::PathBuf;
use log::debug;
use serde::{Deserialize, Serialize};
use crate::consts::{
DEFAULT_CURSOR_TIMEOUT, DEFAULT_DATABASES_DIRECTORY_PATH, DEFAULT_MAX_CONNECTIONS_PER_VAULT,
};
use crate::consts::{DEFAULT_DATABASES_DIRECTORY_PATH, DEFAULT_MAX_CONNECTIONS_PER_VAULT};
#[derive(Debug, Deserialize, Serialize, Clone)]
pub struct DatabaseConfig {
@ -14,9 +12,6 @@ pub struct DatabaseConfig {
#[serde(default = "default_max_connections_per_vault")]
pub max_connections_per_vault: u32,
#[serde(default = "default_cursor_timeout", with = "humantime_serde")]
pub cursor_timeout: Duration,
}
fn default_databases_directory_path() -> PathBuf {
@ -29,17 +24,11 @@ fn default_max_connections_per_vault() -> u32 {
DEFAULT_MAX_CONNECTIONS_PER_VAULT
}
fn default_cursor_timeout() -> Duration {
debug!("Using default cursor timeout: {DEFAULT_CURSOR_TIMEOUT:?}");
DEFAULT_CURSOR_TIMEOUT
}
impl Default for DatabaseConfig {
fn default() -> Self {
Self {
databases_directory_path: default_databases_directory_path(),
max_connections_per_vault: default_max_connections_per_vault(),
cursor_timeout: default_cursor_timeout(),
}
}
}

View file

@ -1,13 +1,11 @@
use log::debug;
use serde::{Deserialize, Serialize};
use std::time::Duration;
use crate::consts::{
DEFAULT_HOST, DEFAULT_MAX_BODY_SIZE_MB, DEFAULT_MAX_CLIENTS_PER_VAULT,
DEFAULT_MERGEABLE_FILE_EXTENSIONS, DEFAULT_PORT, DEFAULT_RESPONSE_TIMEOUT_SECONDS,
DEFAULT_HOST, DEFAULT_MAX_BODY_SIZE_MB, DEFAULT_MAX_CLIENTS_PER_VAULT, DEFAULT_PORT,
};
#[derive(Debug, Deserialize, Serialize, Clone, Default)]
#[derive(Debug, Deserialize, Serialize, Clone)]
pub struct ServerConfig {
#[serde(default = "default_host")]
pub host: String,
@ -20,12 +18,6 @@ pub struct ServerConfig {
#[serde(default = "default_max_clients_per_vault")]
pub max_clients_per_vault: usize,
#[serde(default = "default_response_timeout", with = "humantime_serde")]
pub response_timeout: Duration,
#[serde(default = "default_mergeable_file_extensions")]
pub mergeable_file_extensions: Vec<String>,
}
fn default_host() -> String {
@ -39,7 +31,7 @@ fn default_port() -> u16 {
}
fn default_max_body_size_mb() -> usize {
debug!("Using default max body size {DEFAULT_MAX_BODY_SIZE_MB} MB");
debug!("Using default max body size (MB): {DEFAULT_MAX_BODY_SIZE_MB}");
DEFAULT_MAX_BODY_SIZE_MB
}
@ -48,15 +40,13 @@ fn default_max_clients_per_vault() -> usize {
DEFAULT_MAX_CLIENTS_PER_VAULT
}
fn default_response_timeout() -> Duration {
debug!("Using default response timeout: {DEFAULT_RESPONSE_TIMEOUT_SECONDS:?}");
DEFAULT_RESPONSE_TIMEOUT_SECONDS
}
fn default_mergeable_file_extensions() -> Vec<String> {
debug!("Using default mergeable file extensions: {DEFAULT_MERGEABLE_FILE_EXTENSIONS:?}");
DEFAULT_MERGEABLE_FILE_EXTENSIONS
.iter()
.map(|s| (*s).to_owned())
.collect()
impl Default for ServerConfig {
fn default() -> Self {
Self {
host: default_host(),
port: default_port(),
max_body_size_mb: default_max_body_size_mb(),
max_clients_per_vault: default_max_clients_per_vault(),
}
}
}

View file

@ -0,0 +1,61 @@
use rand::{Rng as _, distributions::Alphanumeric, thread_rng};
use serde::{Deserialize, Serialize};
use crate::app_state::database::models::VaultId;
#[derive(Debug, Deserialize, Serialize, Clone)]
pub struct UserConfig {
#[serde(default = "default_users")]
pub user_tokens: Vec<User>,
}
impl UserConfig {
pub fn get_user(&self, token: &str) -> Option<&User> {
self.user_tokens.iter().find(|u| u.token == token)
}
}
#[derive(Debug, Deserialize, Serialize, Clone)]
pub struct User {
pub name: String,
pub token: String,
pub vault_access: VaultAccess,
}
impl Default for UserConfig {
fn default() -> Self {
Self {
user_tokens: default_users(),
}
}
}
#[derive(Debug, Deserialize, Serialize, Clone, Default)]
#[serde(rename_all = "snake_case", tag = "type")]
pub enum VaultAccess {
#[default]
AllowAccessToAll,
AllowList(AllowListedVaults),
}
#[derive(Debug, Deserialize, Serialize, Clone, Default)]
pub struct AllowListedVaults {
pub allowed: Vec<VaultId>,
}
fn default_users() -> Vec<User> {
vec![User {
name: "admin".to_owned(),
token: get_random_token(),
vault_access: VaultAccess::default(),
}]
}
pub fn get_random_token() -> String {
thread_rng()
.sample_iter(&Alphanumeric)
.take(64)
.map(char::from)
.collect()
}

View file

@ -0,0 +1,7 @@
pub const DEFAULT_CONFIG_PATH: &str = "config.yml";
pub const DEFAULT_DATABASES_DIRECTORY_PATH: &str = "databases";
pub const DEFAULT_HOST: &str = "127.0.0.1";
pub const DEFAULT_PORT: u16 = 3000;
pub const DEFAULT_MAX_CONNECTIONS_PER_VAULT: u32 = 12;
pub const DEFAULT_MAX_BODY_SIZE_MB: usize = 4096;
pub const DEFAULT_MAX_CLIENTS_PER_VAULT: usize = 256;

View file

@ -1,14 +1,15 @@
use std::fmt::Display;
use aide::OperationOutput;
use axum::{
Json,
http::StatusCode,
response::{IntoResponse, Response},
};
use log::{debug, error};
use log::{error, info};
use schemars::JsonSchema;
use serde::Serialize;
use thiserror::Error;
use ts_rs::TS;
#[derive(Error, Debug)]
pub enum SyncServerError {
@ -44,18 +45,14 @@ impl SyncServerError {
}
}
#[derive(TS, Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
#[ts(export)]
#[derive(Debug, Clone, Serialize, JsonSchema)]
pub struct SerializedError {
pub error_type: &'static str,
pub message: String,
pub causes: Vec<String>,
}
impl Display for SerializedError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}: {}", self.error_type, self.message)?;
if !self.causes.is_empty() {
write!(f, "\nCauses:\n")?;
for cause in &self.causes {
@ -93,49 +90,41 @@ impl From<&anyhow::Error> for SerializedError {
}
SerializedError {
error_type: error.downcast_ref::<SyncServerError>().map_or(
"UnknownError",
|e| match e {
SyncServerError::InitError(_) => "InitError",
SyncServerError::ClientError(_) => "ClientError",
SyncServerError::ServerError(_) => "ServerError",
SyncServerError::NotFound(_) => "NotFound",
SyncServerError::Unauthenticated(_) => "Unauthenticated",
SyncServerError::PermissionDeniedError(_) => "PermissionDeniedError",
},
),
message: error.to_string(),
causes,
}
}
}
pub fn init_error(error: anyhow::Error) -> SyncServerError {
debug!("Initialization error: {error:?}");
impl OperationOutput for SyncServerError {
type Inner = Self;
}
pub const fn init_error(error: anyhow::Error) -> SyncServerError {
SyncServerError::InitError(error)
}
pub fn server_error(error: anyhow::Error) -> SyncServerError {
debug!("Server error: {error:?}");
error!("Server error: {:?}", error);
SyncServerError::ServerError(error)
}
pub fn client_error(error: anyhow::Error) -> SyncServerError {
debug!("Client error: {error:?}");
info!("Client error: {:?}", error);
SyncServerError::ClientError(error)
}
pub fn not_found_error(error: anyhow::Error) -> SyncServerError {
debug!("Not found: {error:?}");
info!("Not found: {:?}", error);
SyncServerError::NotFound(error)
}
pub fn unauthenticated_error(error: anyhow::Error) -> SyncServerError {
debug!("Unauthenticated user: {error:?}");
info!("Unauthenticated user: {:?}", error);
SyncServerError::Unauthenticated(error)
}
pub fn permission_denied_error(error: anyhow::Error) -> SyncServerError {
debug!("Permission denied: {error:?}");
info!("Permission denied: {:?}", error);
SyncServerError::PermissionDeniedError(error)
}

View file

@ -0,0 +1,86 @@
mod app_state;
mod cli;
mod config;
mod consts;
mod errors;
mod server;
mod utils;
use std::process::ExitCode;
use anyhow::{Context as _, Result};
use clap::Parser;
use cli::args::Args;
use errors::{SyncServerError, init_error};
use log::info;
use server::create_server;
use tracing_subscriber::{EnvFilter, fmt::format, util::SubscriberInitExt};
#[tokio::main]
async fn main() -> ExitCode {
let args = Args::parse();
let mut result = set_up_logging(&args);
if result.is_ok() {
result = start_server(args).await;
}
match result {
Ok(()) => ExitCode::SUCCESS,
Err(e) => {
eprintln!("{}", e.serialize());
ExitCode::FAILURE
}
}
}
fn set_up_logging(args: &Args) -> Result<(), SyncServerError> {
let level_filter = match args.verbose.log_level_filter() {
// We don't want to allow disabling all logging
log::LevelFilter::Off | log::LevelFilter::Error => tracing::Level::ERROR,
log::LevelFilter::Warn => tracing::Level::WARN,
log::LevelFilter::Info => tracing::Level::INFO,
log::LevelFilter::Debug => tracing::Level::DEBUG,
log::LevelFilter::Trace => tracing::Level::TRACE,
};
let env_filter = EnvFilter::builder()
.with_default_directive(level_filter.into())
.from_env()
.context("Failed to create logging env filter")
.map_err(init_error)?;
let use_colors = args.color.use_colors();
let is_debug_mode = args.verbose.log_level_filter() >= log::LevelFilter::Debug;
tracing_subscriber::fmt()
.with_ansi(use_colors)
.with_env_filter(env_filter)
.event_format(
format()
.without_time()
.with_target(is_debug_mode)
.with_line_number(is_debug_mode)
.compact(),
)
.finish()
.try_init()
.context("Failed to initialise tracing")
.map_err(init_error)?;
Ok(())
}
async fn start_server(args: Args) -> Result<(), SyncServerError> {
info!(
"Starting VaultLink server version {}",
env!("CARGO_PKG_VERSION")
);
create_server(args.config_path)
.await
.context("Failed to start server")
.map_err(init_error)
}

View file

@ -1,36 +1,41 @@
pub mod auth;
mod auth;
mod create_document;
mod delete_document;
mod device_id_header;
mod fetch_document_version;
mod fetch_document_version_content;
mod fetch_latest_document_version;
mod fetch_latest_documents;
mod index;
mod ping;
mod requests;
mod responses;
mod update_document;
mod websocket;
use std::{ffi::OsString, sync::Arc};
use aide::{
axum::{
ApiRouter,
routing::{delete, get, post, put},
},
openapi::{Info, OpenApi},
scalar::Scalar,
transform::TransformOpenApi,
};
use anyhow::{Context as _, Result, anyhow};
use auth::auth_middleware;
use axum::{
Router,
Extension, Json,
extract::{DefaultBodyLimit, Request},
http::{self, HeaderValue, Method},
middleware,
response::IntoResponse,
routing::{IntoMakeService, delete, get, post, put},
routing::IntoMakeService,
};
use device_id_header::DEVICE_ID_HEADER_NAME;
use log::info;
use log::{error, info};
use tokio::signal;
use tower_http::{
LatencyUnit,
cors::CorsLayer,
limit::RequestBodyLimitLayer,
timeout::TimeoutLayer,
trace::{
DefaultOnBodyChunk, DefaultOnEos, DefaultOnFailure, DefaultOnRequest, DefaultOnResponse,
TraceLayer,
@ -40,42 +45,67 @@ use tracing::{Level, info_span};
use crate::{
app_state::AppState,
config::{Config, server_config::ServerConfig},
errors::{client_error, not_found_error},
config::server_config::ServerConfig,
errors::{SerializedError, not_found_error},
};
pub async fn create_server(config: Config) -> Result<()> {
let app_state = AppState::try_new(config)
pub async fn create_server(config_path: Option<OsString>) -> Result<()> {
aide::r#gen::on_error(|err| error!("{err}"));
aide::r#gen::extract_schemas(true);
let app_state = AppState::try_new(config_path)
.await
.context("Failed to initialise app state")?;
let server_config = app_state.config.server.clone();
let app = Router::new()
.nest("/", get_authed_routes(app_state.clone()))
.route("/", get(index::index))
.route("/vaults/:vault_id/ping", get(ping::ping))
.route("/vaults/:vault_id/ws", get(websocket::websocket_handler))
.layer(DefaultBodyLimit::disable())
.layer(RequestBodyLimitLayer::new(
app_state.config.server.max_body_size_mb * 1024 * 1024,
))
.layer(TimeoutLayer::new(server_config.response_timeout))
.layer(
CorsLayer::new()
.allow_origin("*".parse::<HeaderValue>().expect("Failed to parse origin"))
.allow_headers([
http::header::CONTENT_TYPE,
http::header::AUTHORIZATION,
DEVICE_ID_HEADER_NAME.clone(),
])
.allow_methods([Method::GET, Method::POST, Method::PUT, Method::DELETE]),
let mut api = create_open_api();
let app = ApiRouter::new()
.api_route("/vaults/:vault_id/ping", get(ping::ping))
.api_route(
"/vaults/:vault_id/documents",
get(fetch_latest_documents::fetch_latest_documents),
)
.route("/vaults/:vault_id/ws", get(websocket::websocket_handler))
.api_route(
"/vaults/:vault_id/documents",
post(create_document::create_document_multipart),
)
.api_route(
"/vaults/:vault_id/documents/json",
post(create_document::create_document_json),
)
.api_route(
"/vaults/:vault_id/documents/:document_id",
get(fetch_latest_document_version::fetch_latest_document_version),
)
.api_route(
"/vaults/:vault_id/documents/:document_id",
put(update_document::update_document_multipart),
)
.api_route(
"/vaults/:vault_id/documents/:document_id/json",
put(update_document::update_document_json),
)
.api_route(
"/vaults/:vault_id/documents/:document_id/versions/:version_id",
put(fetch_document_version::fetch_document_version),
)
.api_route(
"/vaults/:vault_id/documents/:document_id/versions/:version_id/content",
put(fetch_document_version_content::fetch_document_version_content),
)
.api_route(
"/vaults/:vault_id/documents/:document_id",
delete(delete_document::delete_document),
)
.route("/", Scalar::new("/api.json").axum_route())
.route("/api.json", axum::routing::get(serve_api))
.layer(
TraceLayer::new_for_http()
.make_span_with(|request: &Request<_>| {
info_span!(
"http",
"http_request",
method = ?request.method(),
uri = ?request.uri(),
)
@ -90,49 +120,49 @@ pub async fn create_server(config: Config) -> Result<()> {
.on_eos(DefaultOnEos::new())
.on_failure(DefaultOnFailure::new().level(Level::ERROR)),
)
.layer(DefaultBodyLimit::disable())
.layer(RequestBodyLimitLayer::new(
app_state.config.server.max_body_size_mb * 1024 * 1024,
))
.layer(
CorsLayer::new()
.allow_origin("*".parse::<HeaderValue>().expect("Failed to parse origin"))
.allow_headers([http::header::CONTENT_TYPE, http::header::AUTHORIZATION])
.allow_methods([Method::GET, Method::POST, Method::PUT, Method::DELETE]),
)
.with_state(app_state)
.fallback(handle_404)
.fallback(handle_405)
.finish_api_with(&mut api, add_api_docs_error_example)
.layer(Extension(Arc::new(api))) // https://github.com/tamasfe/aide/blob/507f4a8822bc0c13cbda0f589da1e0f4cbcdb812/examples/example-axum/src/main.rs#L39
.fallback(handler_404)
.into_make_service();
start_server(app, &server_config).await
}
fn get_authed_routes(app_state: AppState) -> Router<AppState> {
Router::new()
.route(
"/vaults/:vault_id/documents",
get(fetch_latest_documents::fetch_latest_documents),
)
.route(
"/vaults/:vault_id/documents",
post(create_document::create_document),
)
.route(
"/vaults/:vault_id/documents/:document_id",
get(fetch_latest_document_version::fetch_latest_document_version),
)
.route(
"/vaults/:vault_id/documents/:document_id/binary",
put(update_document::update_binary),
)
.route(
"/vaults/:vault_id/documents/:document_id/text",
put(update_document::update_text),
)
.route(
"/vaults/:vault_id/documents/:document_id/versions/:vault_update_id",
get(fetch_document_version::fetch_document_version),
)
.route(
"/vaults/:vault_id/documents/:document_id/versions/:vault_update_id/content",
get(fetch_document_version_content::fetch_document_version_content),
)
.route(
"/vaults/:vault_id/documents/:document_id",
delete(delete_document::delete_document),
)
.layer(middleware::from_fn_with_state(app_state, auth_middleware))
async fn serve_api(Extension(api): Extension<Arc<OpenApi>>) -> impl IntoResponse { Json(api) }
fn create_open_api() -> OpenApi {
OpenApi {
info: Info {
title: "VaultLink sync server".to_owned(),
summary: Some(
"Simple API for syncing documents between concurrent clients.".to_owned(),
),
description: Some(include_str!("../README.md").to_owned()),
version: env!("CARGO_PKG_VERSION").to_owned(),
..Info::default()
},
..OpenApi::default()
}
}
fn add_api_docs_error_example(api: TransformOpenApi<'_>) -> TransformOpenApi<'_> {
api.default_response_with::<Json<SerializedError>, _>(|res| {
res.example(SerializedError {
message: "An error has occurred".to_owned(),
causes: vec![],
})
})
}
async fn start_server(app: IntoMakeService<axum::Router>, config: &ServerConfig) -> Result<()> {
@ -179,10 +209,4 @@ async fn shutdown_signal() {
}
}
async fn handle_404() -> impl IntoResponse {
not_found_error(anyhow!("Page not found"))
}
async fn handle_405() -> impl IntoResponse {
client_error(anyhow!("Method not allowed"))
}
async fn handler_404() -> impl IntoResponse { not_found_error(anyhow!("Page not found")) }

View file

@ -0,0 +1,34 @@
use log::info;
use crate::{
app_state::{AppState, database::models::VaultId},
config::user_config::{AllowListedVaults, User, VaultAccess},
errors::{SyncServerError, permission_denied_error, unauthenticated_error},
};
// TODO: turn this into a middleware
pub fn auth(app_state: &AppState, token: &str, vault: &VaultId) -> Result<User, SyncServerError> {
let user = app_state
.config
.users
.get_user(token)
.cloned()
.ok_or_else(|| unauthenticated_error(anyhow::anyhow!("Invalid token")))?;
info!("User `{}` authenticated", user.name);
if match user.vault_access {
VaultAccess::AllowAccessToAll => true,
VaultAccess::AllowList(AllowListedVaults { ref allowed }) => allowed.contains(vault),
} {
info!(
"User `{}` is authorised to access to vault `{}`",
user.name, vault
);
Ok(user)
} else {
Err(permission_denied_error(anyhow::anyhow!(
"Permission denied for vault `{vault}`"
)))
}
}

View file

@ -0,0 +1,152 @@
use aide_axum_typed_multipart::TypedMultipart;
use anyhow::Context as _;
use axum::extract::{Path, State};
use axum_extra::{
TypedHeader,
headers::{Authorization, authorization::Bearer},
};
use axum_jsonschema::Json;
use schemars::JsonSchema;
use serde::Deserialize;
use sync_lib::base64_to_bytes;
use super::{
auth::auth,
requests::{CreateDocumentVersion, CreateDocumentVersionMultipart},
};
use crate::{
app_state::{
AppState,
database::models::{
DocumentId, DocumentVersionWithoutContent, StoredDocumentVersion, VaultId,
},
},
errors::{SyncServerError, client_error, server_error},
utils::sanitize_path,
};
// This is required for aide to infer the path parameter types and names
#[derive(Deserialize, JsonSchema)]
pub struct CreateDocumentPathParams {
vault_id: VaultId,
}
/// Create a new document in case a document with the same doesn't exist
/// already. If a document with the same path exists, a new version is created
/// with their content merged.
#[axum::debug_handler]
pub async fn create_document_multipart(
TypedHeader(auth_header): TypedHeader<Authorization<Bearer>>,
Path(CreateDocumentPathParams { vault_id }): Path<CreateDocumentPathParams>,
State(state): State<AppState>,
TypedMultipart(axum_typed_multipart::TypedMultipart(request)): TypedMultipart<
CreateDocumentVersionMultipart,
>,
) -> Result<Json<DocumentVersionWithoutContent>, SyncServerError> {
internal_create_document(
auth_header,
state,
vault_id,
request.document_id,
request.relative_path,
request.content.contents.to_vec(),
)
.await
}
/// Create a new document in case a document with the same doesn't exist
/// already. If a document with the same path exists, a new version is created
/// with their content merged.
#[axum::debug_handler]
pub async fn create_document_json(
TypedHeader(auth_header): TypedHeader<Authorization<Bearer>>,
Path(CreateDocumentPathParams { vault_id }): Path<CreateDocumentPathParams>,
State(state): State<AppState>,
Json(request): Json<CreateDocumentVersion>,
) -> Result<Json<DocumentVersionWithoutContent>, SyncServerError> {
let content_bytes = base64_to_bytes(&request.content_base64)
.context("Failed to decode base64 content in request")
.map_err(client_error)?;
internal_create_document(
auth_header,
state,
vault_id,
request.document_id,
request.relative_path,
content_bytes,
)
.await
}
async fn internal_create_document(
auth_header: Authorization<Bearer>,
state: AppState,
vault_id: VaultId,
document_id: Option<DocumentId>,
relative_path: String,
content: Vec<u8>,
) -> Result<Json<DocumentVersionWithoutContent>, SyncServerError> {
auth(&state, auth_header.token(), &vault_id)?;
let mut transaction = state
.database
.create_write_transaction(&vault_id)
.await
.map_err(server_error)?;
let document_id = match document_id {
Some(document_id) => {
let existing_version = state
.database
.get_latest_document(&vault_id, &document_id, Some(&mut transaction))
.await
.map_err(server_error)?;
if existing_version.is_some() {
return Err(client_error(anyhow::anyhow!(
"Document with the same ID already exists"
)));
}
document_id
}
None => uuid::Uuid::new_v4(),
};
let last_update_id = state
.database
.get_max_update_id_in_vault(&vault_id, Some(&mut transaction))
.await
.map_err(server_error)?;
let sanitized_relative_path = sanitize_path(&relative_path);
let new_version = StoredDocumentVersion {
vault_update_id: last_update_id + 1,
document_id,
relative_path: sanitized_relative_path,
content,
updated_date: chrono::Utc::now(),
is_deleted: false,
};
state
.database
.insert_document_version(&vault_id, &new_version, Some(&mut transaction))
.await
.map_err(server_error)?;
transaction
.commit()
.await
.context("Failed to commit successful transaction")
.map_err(server_error)?;
state
.broadcasts
.send(vault_id, new_version.clone().into())
.await;
Ok(Json(new_version.into()))
}

View file

@ -0,0 +1,81 @@
use anyhow::Context as _;
use axum::extract::{Path, State};
use axum_extra::{
TypedHeader,
headers::{Authorization, authorization::Bearer},
};
use axum_jsonschema::Json;
use schemars::JsonSchema;
use serde::Deserialize;
use super::{auth::auth, requests::DeleteDocumentVersion};
use crate::{
app_state::{
AppState,
database::models::{
DocumentId, DocumentVersionWithoutContent, StoredDocumentVersion, VaultId,
},
},
errors::{SyncServerError, server_error},
utils::sanitize_path,
};
// This is required for aide to infer the path parameter types and names
#[derive(Deserialize, JsonSchema)]
pub struct DeleteDocumentPathParams {
vault_id: VaultId,
document_id: DocumentId,
}
#[axum::debug_handler]
pub async fn delete_document(
TypedHeader(auth_header): TypedHeader<Authorization<Bearer>>,
Path(DeleteDocumentPathParams {
vault_id,
document_id,
}): Path<DeleteDocumentPathParams>,
State(state): State<AppState>,
Json(request): Json<DeleteDocumentVersion>,
) -> Result<Json<DocumentVersionWithoutContent>, SyncServerError> {
auth(&state, auth_header.token(), &vault_id)?;
let mut transaction = state
.database
.create_write_transaction(&vault_id)
.await
.map_err(server_error)?;
let last_update_id = state
.database
.get_max_update_id_in_vault(&vault_id, Some(&mut transaction))
.await
.map_err(server_error)?;
let new_version = StoredDocumentVersion {
vault_update_id: last_update_id + 1,
document_id,
relative_path: sanitize_path(&request.relative_path),
content: vec![],
updated_date: chrono::Utc::now(),
is_deleted: true,
};
state
.database
.insert_document_version(&vault_id, &new_version, Some(&mut transaction))
.await
.map_err(server_error)?;
transaction
.commit()
.await
.context("Failed to commit successful transaction")
.map_err(server_error)?;
state
.broadcasts
.send(vault_id, new_version.clone().into())
.await;
Ok(Json(new_version.into()))
}

View file

@ -1,31 +1,33 @@
use anyhow::anyhow;
use axum::{
Json,
extract::{Path, State},
use axum::extract::{Path, State};
use axum_extra::{
TypedHeader,
headers::{Authorization, authorization::Bearer},
};
use log::debug;
use axum_jsonschema::Json;
use schemars::JsonSchema;
use serde::Deserialize;
use super::auth::auth;
use crate::{
app_state::{
AppState,
database::models::{DocumentId, DocumentVersion, VaultId, VaultUpdateId},
},
errors::{SyncServerError, not_found_error, server_error},
utils::normalize::normalize,
};
#[derive(Deserialize)]
// This is required for aide to infer the path parameter types and names
#[derive(Deserialize, JsonSchema)]
pub struct FetchDocumentVersionPathParams {
#[serde(deserialize_with = "normalize")]
vault_id: VaultId,
document_id: DocumentId,
vault_update_id: VaultUpdateId,
}
#[axum::debug_handler]
pub async fn fetch_document_version(
TypedHeader(auth_header): TypedHeader<Authorization<Bearer>>,
Path(FetchDocumentVersionPathParams {
vault_id,
document_id,
@ -33,9 +35,7 @@ pub async fn fetch_document_version(
}): Path<FetchDocumentVersionPathParams>,
State(state): State<AppState>,
) -> Result<Json<DocumentVersion>, SyncServerError> {
debug!(
"Fetching document version `{vault_update_id}` for document `{document_id}` in vault `{vault_id}`"
);
auth(&state, auth_header.token(), &vault_id)?;
let result = state
.database
@ -54,7 +54,7 @@ pub async fn fetch_document_version(
if result.document_id != document_id {
return Err(not_found_error(anyhow!(
"Document with document id `{document_id}` does not have a version with id \
`{vault_update_id}`",
`{vault_update_id}`",
)));
}

Some files were not shown because too many files have changed in this diff Show more