Compare commits
100 commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 17a96be0fc | |||
| 22723cbcae | |||
| 8e237bc232 | |||
| c1bc0b8955 | |||
| 8d14510b1c | |||
| 6d63d0ee8f | |||
| fc0d17837d | |||
| 1c94f771b2 | |||
| bd3c454941 | |||
| 656f3a91df | |||
| b611ac813e | |||
| 4f8abc9ce2 | |||
| 77e5fc07d3 | |||
| f661e1d6f9 | |||
| 4cc0444b5b | |||
| 7ad029924e | |||
| 32d338d496 | |||
| e08ef27d6a | |||
| 149ff8fd95 | |||
|
|
5d588b1bac | ||
|
|
7759275a53 | ||
|
|
386535497b | ||
| 6d280112fd | |||
| 7c18b6201f | |||
| 40b18721ad | |||
| 6aa7ebf29d | |||
| 25ee83174e | |||
| c58d81592d | |||
| e1d39d916a | |||
| 59284d00f9 | |||
| 0ab0e2e860 | |||
| 0aea22c211 | |||
| 5a698fe65d | |||
| 87fc848bfc | |||
| 0ce211177c | |||
| abe1feef09 | |||
| cc16505ef9 | |||
| c2144a2634 | |||
|
|
31993762de | ||
|
|
5a0e82b3e1 | ||
|
|
1db4cd02f9 | ||
| 92e0697b05 | |||
| 545be141d8 | |||
| 7b81034625 | |||
| 1a984427ab | |||
| 72dc942be6 | |||
| 79dfe992d1 | |||
| 9a82d6d8dd | |||
| bd2ef79fb1 | |||
| 23c000954a | |||
| a80da338e4 | |||
| 665cdb2881 | |||
| be6e861ee7 | |||
| f22c17347a | |||
| deffa195b3 | |||
| 408ce5268f | |||
| 6d1d5ca3bc | |||
| 3abc45cb86 | |||
| 776571bc5e | |||
| b012330a36 | |||
| 3d382ad741 | |||
| 5962feb90a | |||
| 88d48afce3 | |||
|
|
e03b9147df | ||
|
|
ff5421d023 | ||
|
|
9f597ab8ae | ||
|
|
065b689103 | ||
| b4774c8cfd | |||
| 2bb647cdac | |||
| b7b22a63cd | |||
| e85eb485e8 | |||
| 6191d1adb3 | |||
| 7d060f22ca | |||
| feed882635 | |||
| 999f93f55e | |||
| 840fa238a2 | |||
| 9e8c5ef524 | |||
| 24666f3435 | |||
| c38b8f19a1 | |||
| a1a339b23d | |||
| a6cf7ac38b | |||
| 3d49eb8859 | |||
| 1b46e5d237 | |||
| 8339474a21 | |||
| 0556e449e6 | |||
| c0b8d3854d | |||
| 450eaaff05 | |||
| 3da0673af6 | |||
| e052aa46c4 | |||
| de89532880 | |||
| 8a52034426 | |||
| 6f002459ae | |||
|
|
27f071d8db | ||
|
|
cbd240a703 | ||
|
|
d6b9467226 | ||
|
|
6b2aaa0328 | ||
|
|
da1c78e5bf | ||
| d87dd58b60 | |||
| 8911f88157 | |||
|
|
870f205130 |
95 changed files with 6822 additions and 2370 deletions
74
.forgejo/workflows/check.yml
Normal file
74
.forgejo/workflows/check.yml
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
name: Check
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: ['main']
|
||||
pull_request:
|
||||
branches: ['main']
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RUSTFLAGS: '-Dwarnings'
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: docker
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Node
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: '22.x'
|
||||
check-latest: true
|
||||
|
||||
- name: Cache Rust dependencies
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
~/.cargo/bin/
|
||||
~/.cargo/registry/index/
|
||||
~/.cargo/registry/cache/
|
||||
~/.cargo/git/db/
|
||||
target/
|
||||
key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-cargo-
|
||||
|
||||
- name: Cache npm dependencies
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
reconcile-js/node_modules
|
||||
examples/website/node_modules
|
||||
~/.npm
|
||||
key: >-
|
||||
${{ runner.os }}-npm-${{
|
||||
hashFiles(
|
||||
'reconcile-js/package-lock.json',
|
||||
'examples/website/package-lock.json'
|
||||
)
|
||||
}}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-npm-
|
||||
|
||||
- name: Install Rust toolchain
|
||||
run: |
|
||||
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs \
|
||||
| sh -s -- -y --default-toolchain none --profile minimal
|
||||
echo "$HOME/.cargo/bin" >> "$GITHUB_PATH"
|
||||
|
||||
- name: Install uv
|
||||
run: |
|
||||
curl --proto '=https' --tlsv1.2 -LsSf https://astral.sh/uv/install.sh | sh
|
||||
echo "$HOME/.local/bin" >> "$GITHUB_PATH"
|
||||
|
||||
- name: Lint
|
||||
run: scripts/lint.sh
|
||||
|
||||
- name: Test
|
||||
run: scripts/test.sh
|
||||
|
||||
- name: Build website
|
||||
run: scripts/build-website.sh
|
||||
|
|
@ -1,25 +1,28 @@
|
|||
name: Check & publish
|
||||
name: Publish
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: ['main']
|
||||
tags: ['*']
|
||||
pull_request:
|
||||
branches: ['main']
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RUSTFLAGS: '-Dwarnings'
|
||||
|
||||
concurrency:
|
||||
group: 'pages'
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: docker
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Node.js environment
|
||||
uses: actions/setup-node@v4.2.0
|
||||
- name: Setup Node
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: '22.x'
|
||||
check-latest: true
|
||||
|
|
@ -37,27 +40,52 @@ jobs:
|
|||
restore-keys: |
|
||||
${{ runner.os }}-cargo-
|
||||
|
||||
- name: Setup rust
|
||||
run: |
|
||||
which wasm-pack || cargo install wasm-pack
|
||||
which cargo-machete || cargo install cargo-machete
|
||||
- name: Cache npm dependencies
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
reconcile-js/node_modules
|
||||
examples/website/node_modules
|
||||
~/.npm
|
||||
key: >-
|
||||
${{ runner.os }}-npm-${{
|
||||
hashFiles(
|
||||
'reconcile-js/package-lock.json',
|
||||
'examples/website/package-lock.json'
|
||||
)
|
||||
}}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-npm-
|
||||
|
||||
- name: Build wasm
|
||||
- name: Install Rust toolchain
|
||||
run: |
|
||||
wasm-pack build --target web --features wasm
|
||||
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs \
|
||||
| sh -s -- -y --default-toolchain none --profile minimal
|
||||
echo "$HOME/.cargo/bin" >> "$GITHUB_PATH"
|
||||
|
||||
- name: Install uv
|
||||
run: |
|
||||
curl --proto '=https' --tlsv1.2 -LsSf https://astral.sh/uv/install.sh | sh
|
||||
echo "$HOME/.local/bin" >> "$GITHUB_PATH"
|
||||
|
||||
- name: Lint
|
||||
run: |
|
||||
cargo clippy --all-targets --all-features
|
||||
cargo fmt --all -- --check
|
||||
cargo machete
|
||||
run: scripts/lint.sh
|
||||
|
||||
- name: Test
|
||||
run: scripts/test.sh
|
||||
|
||||
- name: Build website
|
||||
run: scripts/build-website.sh
|
||||
|
||||
- name: Deploy to pages mount
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
||||
run: |
|
||||
apt-get update && apt-get install -y rsync
|
||||
rsync -a --delete examples/website/dist/ /pages/reconcile
|
||||
|
||||
publish-crate:
|
||||
needs: build
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: docker
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
|
||||
steps:
|
||||
|
|
@ -76,19 +104,25 @@ jobs:
|
|||
restore-keys: |
|
||||
${{ runner.os }}-cargo-
|
||||
|
||||
- name: Install Rust toolchain
|
||||
run: |
|
||||
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs \
|
||||
| sh -s -- -y --default-toolchain none --profile minimal
|
||||
echo "$HOME/.cargo/bin" >> "$GITHUB_PATH"
|
||||
|
||||
- name: Publish to crates.io
|
||||
run: cargo publish --token ${{ secrets.CRATES_IO_TOKEN }}
|
||||
|
||||
publish-npm:
|
||||
needs: build
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: docker
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Node.js environment
|
||||
uses: actions/setup-node@v4.2.0
|
||||
- name: Setup Node
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: '22.x'
|
||||
check-latest: true
|
||||
|
|
@ -113,23 +147,21 @@ jobs:
|
|||
path: |
|
||||
reconcile-js/node_modules
|
||||
~/.npm
|
||||
key: ${{ runner.os }}-npm-${{ hashFiles('reconcile-js/package-lock.json') }}
|
||||
key: >-
|
||||
${{ runner.os }}-npm-${{
|
||||
hashFiles('reconcile-js/package-lock.json')
|
||||
}}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-npm-
|
||||
|
||||
- name: Setup rust
|
||||
- name: Install Rust toolchain
|
||||
run: |
|
||||
which wasm-pack || cargo install wasm-pack
|
||||
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs \
|
||||
| sh -s -- -y --default-toolchain none --profile minimal
|
||||
echo "$HOME/.cargo/bin" >> "$GITHUB_PATH"
|
||||
|
||||
- name: Build wasm
|
||||
run: |
|
||||
wasm-pack build --target web --features wasm
|
||||
|
||||
- name: Build reconcile-js
|
||||
run: |
|
||||
cd reconcile-js
|
||||
npm ci
|
||||
npm run build
|
||||
- name: Build website
|
||||
run: scripts/build-website.sh
|
||||
|
||||
- name: Publish reconcile-js to NPM
|
||||
run: |
|
||||
21
.github/dependabot.yml
vendored
21
.github/dependabot.yml
vendored
|
|
@ -1,21 +0,0 @@
|
|||
# To get started with Dependabot version updates, you'll need to specify which
|
||||
# package ecosystems to update and where the package manifests are located.
|
||||
# Please see the documentation for all configuration options:
|
||||
# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
|
||||
|
||||
version: 2
|
||||
updates:
|
||||
- package-ecosystem: 'cargo'
|
||||
directories: ['**']
|
||||
schedule:
|
||||
interval: 'daily'
|
||||
|
||||
- package-ecosystem: 'github-actions'
|
||||
directories: ['**']
|
||||
schedule:
|
||||
interval: 'daily'
|
||||
|
||||
- package-ecosystem: 'npm'
|
||||
directories: ['/reconcile-js', '/examples/website']
|
||||
schedule:
|
||||
interval: 'daily'
|
||||
72
.github/workflows/gh-pages.yml
vendored
72
.github/workflows/gh-pages.yml
vendored
|
|
@ -1,72 +0,0 @@
|
|||
name: Deploy Website to GitHub Pages
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
workflow_dispatch:
|
||||
|
||||
# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
|
||||
permissions:
|
||||
contents: read
|
||||
pages: write
|
||||
id-token: write
|
||||
|
||||
# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
|
||||
# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
|
||||
concurrency:
|
||||
group: 'pages'
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: write
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Cache Rust dependencies
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
~/.cargo/bin/
|
||||
~/.cargo/registry/index/
|
||||
~/.cargo/registry/cache/
|
||||
~/.cargo/git/db/
|
||||
target/
|
||||
key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-cargo-
|
||||
|
||||
- name: Cache npm dependencies
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
reconcile-js/node_modules
|
||||
~/.npm
|
||||
key: ${{ runner.os }}-npm-${{ hashFiles('reconcile-js/package-lock.json') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-npm-
|
||||
|
||||
- name: Build wasm
|
||||
run: |
|
||||
which wasm-pack || cargo install wasm-pack
|
||||
scripts/build-website.sh
|
||||
|
||||
- name: Upload artifact
|
||||
uses: actions/upload-pages-artifact@v3
|
||||
with:
|
||||
path: examples/website/dist
|
||||
|
||||
deploy:
|
||||
environment:
|
||||
name: github-pages
|
||||
url: ${{ steps.deployment.outputs.page_url }}
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
steps:
|
||||
- name: Deploy to GitHub Pages
|
||||
id: deployment
|
||||
uses: actions/deploy-pages@v4
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
|
|
@ -9,3 +9,6 @@ node_modules
|
|||
|
||||
# WebPack build output
|
||||
dist
|
||||
|
||||
# Python virtual environment
|
||||
.venv
|
||||
|
|
|
|||
3
.vscode/settings.json
vendored
3
.vscode/settings.json
vendored
|
|
@ -8,5 +8,8 @@
|
|||
},
|
||||
"rust-analyzer.cargo.features": [
|
||||
"all"
|
||||
],
|
||||
"python.analysis.extraPaths": [
|
||||
"./reconcile-python/python"
|
||||
]
|
||||
}
|
||||
769
Cargo.lock
generated
769
Cargo.lock
generated
File diff suppressed because it is too large
Load diff
33
Cargo.toml
33
Cargo.toml
|
|
@ -1,8 +1,8 @@
|
|||
[package]
|
||||
name = "reconcile-text"
|
||||
description = "Intelligent 3-way text merging with automated conflict resolution"
|
||||
version = "0.5.0"
|
||||
rust-version = "1.85"
|
||||
version = "0.11.0"
|
||||
rust-version = "1.94"
|
||||
authors = ["Andras Schmelczer <andras@schmelczer.dev>"]
|
||||
edition = "2024"
|
||||
license = "MIT"
|
||||
|
|
@ -11,7 +11,7 @@ repository = "https://github.com/schmelczer/reconcile"
|
|||
homepage = "https://schmelczer.dev/reconcile"
|
||||
keywords = ["merge", "OT", "CRDT", "3-way", "diff"]
|
||||
categories = ["wasm", "text-processing", "text-editors", "algorithms", "data-structures"]
|
||||
exclude = ["reconcile-js", ".*", "examples/website"]
|
||||
exclude = ["reconcile-js", "reconcile-python", ".*", "examples/website"]
|
||||
|
||||
[lib]
|
||||
crate-type = ["cdylib", "rlib"]
|
||||
|
|
@ -20,10 +20,15 @@ crate-type = ["cdylib", "rlib"]
|
|||
name = "merge-file"
|
||||
path = "examples/merge-file.rs"
|
||||
|
||||
[dependencies]
|
||||
serde = { version = "1.0.219", optional = true, features = ["derive"] }
|
||||
[[example]]
|
||||
name = "compare-with-diff-match-patch"
|
||||
path = "examples/compare-with-diff-match-patch.rs"
|
||||
|
||||
wasm-bindgen = { version = "0.2.99", optional = true }
|
||||
[dependencies]
|
||||
serde = { version = "1.0.228", optional = true, features = ["derive"] }
|
||||
thiserror = "2.0.18"
|
||||
|
||||
wasm-bindgen = { version = "0.2.114", optional = true }
|
||||
|
||||
# The `console_error_panic_hook` crate provides better debugging of panics by
|
||||
# logging them with `console.error`. This is great for development, but requires
|
||||
|
|
@ -31,21 +36,21 @@ wasm-bindgen = { version = "0.2.99", optional = true }
|
|||
# code size when deploying.
|
||||
console_error_panic_hook = { version = "0.1.7", optional = true }
|
||||
|
||||
wee_alloc = { version = "0.4.2", optional = true }
|
||||
|
||||
[features]
|
||||
default = []
|
||||
serde = [ "dep:serde" ]
|
||||
wasm = [ "dep:wasm-bindgen", "dep:wee_alloc" ]
|
||||
wasm = [ "dep:wasm-bindgen", "console_error_panic_hook" ]
|
||||
console_error_panic_hook = [ "dep:console_error_panic_hook" ]
|
||||
all = [ "wasm", "serde" ]
|
||||
|
||||
[dev-dependencies]
|
||||
insta = "1.42.2"
|
||||
insta = "1.46.3"
|
||||
pretty_assertions = "1.4.1"
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde = { version = "1.0.228", features = ["derive"] }
|
||||
serde_yaml = "0.9.34"
|
||||
test-case = "3.3.1"
|
||||
wasm-bindgen-test = "0.3.49"
|
||||
wasm-bindgen-test = "0.3.64"
|
||||
diff-match-patch-rs = "0.5.1"
|
||||
|
||||
[profile.release]
|
||||
codegen-units = 1
|
||||
|
|
@ -54,7 +59,7 @@ opt-level = 3
|
|||
strip = "symbols"
|
||||
|
||||
[package.metadata.wasm-pack.profile.release]
|
||||
wasm-opt = ['-O4', '--enable-bulk-memory']
|
||||
wasm-opt = ['-O4', '--enable-bulk-memory', '--enable-nontrapping-float-to-int']
|
||||
|
||||
[lints.rust]
|
||||
unsafe_code = "forbid"
|
||||
|
|
@ -64,7 +69,7 @@ missing_debug_implementations = "warn"
|
|||
[lints.clippy]
|
||||
await_holding_lock = "warn"
|
||||
dbg_macro = "warn"
|
||||
empty_enum = "warn"
|
||||
empty_enums = "warn"
|
||||
enum_glob_use = "warn"
|
||||
exit = "warn"
|
||||
filter_map_next = "warn"
|
||||
|
|
|
|||
151
README.md
151
README.md
|
|
@ -1,29 +1,31 @@
|
|||
# `reconcile-text`: conflict-free 3-way text merging
|
||||
|
||||
A Rust and TypeScript library for merging conflicting text edits without manual intervention. Unlike traditional 3-way merge tools that produce conflict markers, `reconcile-text` automatically resolves conflicts by applying both sets of changes (while updating cursor positions) using an algorithm inspired by Operational Transformation.
|
||||
A Rust, TypeScript, and Python library for merging conflicting text edits without manual intervention. Unlike traditional 3-way merge tools that produce conflict markers, `reconcile-text` automatically resolves conflicts by applying both sets of changes (while updating cursor positions) using an algorithm inspired by Operational Transformation.
|
||||
|
||||
## Try it
|
||||
|
||||
✨ **[Try the interactive demo](https://schmelczer.dev/reconcile)** to see it in action!
|
||||
✨ **[Try the interactive demo][8]** to see it in action!
|
||||
|
||||
### Install it in your project
|
||||
|
||||
- `cargo add reconcile-text` ([reconcile-text on crates.io](https://crates.io/crates/reconcile-text))
|
||||
- `npm install reconcile-text` ([reconcile-text on NPM](https://www.npmjs.com/package/reconcile-text))
|
||||
- `cargo add reconcile-text` ([reconcile-text on crates.io][9])
|
||||
- `npm install reconcile-text` ([reconcile-text on NPM][10])
|
||||
- `uv add reconcile-text` or `pip install reconcile-text` ([reconcile-text on PyPI][27])
|
||||
|
||||
## Key features
|
||||
|
||||
- **No conflict markers** — Clean, merged output without Git's `<<<<<<<` markers
|
||||
- **Cursor tracking** — Automatically repositions cursors and selections throughout the merging process
|
||||
- **Flexible tokenisation** — Word-level (default), character-level, line-level, or custom tokenisation strategies
|
||||
- **Unicode support** — Full UTF-8 support with proper handling of complex scripts and grapheme clusters
|
||||
- **Cross-platform** — Native Rust performance with WebAssembly bindings for JavaScript environments
|
||||
- **No conflict markers** - Clean, merged output without Git's `<<<<<<<` markers
|
||||
- **Cursor tracking** - Automatically repositions cursors and selections throughout the merging process
|
||||
- **Flexible tokenisation** - Word-level (default), character-level, line-level, or custom tokenisation strategies
|
||||
- **Unicode support** - Full UTF-8 support with proper handling of complex scripts and grapheme clusters
|
||||
- **Cross-platform** - Native Rust performance with WebAssembly bindings for JavaScript and native bindings for Python
|
||||
|
||||
## Quick start
|
||||
|
||||
### Rust
|
||||
|
||||
Install via crates.io:
|
||||
|
||||
```sh
|
||||
cargo add reconcile-text
|
||||
```
|
||||
|
|
@ -32,7 +34,7 @@ Alternatively, add `reconcile-text` to your `Cargo.toml`:
|
|||
|
||||
```toml
|
||||
[dependencies]
|
||||
reconcile-text = "0.5"
|
||||
reconcile-text = "0.8"
|
||||
```
|
||||
|
||||
Then start merging:
|
||||
|
|
@ -51,7 +53,7 @@ let result = reconcile(parent, &left.into(), &right.into(), &*BuiltinTokenizer::
|
|||
assert_eq!(result.apply().text(), "Hi beautiful world");
|
||||
```
|
||||
|
||||
See the [merge-file example](examples/merge-file.rs) for another example or the [library's documentation](https://docs.rs/reconcile-text/latest/reconcile_text).
|
||||
See the [merge-file example](examples/merge-file.rs) for another example, or the [library's documentation][11].
|
||||
|
||||
### JavaScript/TypeScript
|
||||
|
||||
|
|
@ -76,7 +78,33 @@ const result = reconcile(parent, left, right);
|
|||
console.log(result.text); // "Hi beautiful world"
|
||||
```
|
||||
|
||||
See the [example website source](examples/website/src/index.ts) for a more complex example or the [advanced examples document](https://github.com/schmelczer/reconcile/blob/main/docs/advanced-ts.md).
|
||||
See the [example website source](examples/website/src/index.ts) for a more complex example, or the [advanced examples document](docs/advanced-ts.md).
|
||||
|
||||
### Python
|
||||
|
||||
Install via uv or pip:
|
||||
|
||||
```sh
|
||||
uv add reconcile-text
|
||||
# or: pip install reconcile-text
|
||||
```
|
||||
|
||||
Then use it in your application:
|
||||
|
||||
```python
|
||||
from reconcile_text import reconcile
|
||||
|
||||
# Start with the original text
|
||||
parent = "Hello world"
|
||||
# Two users edit simultaneously
|
||||
left = "Hello beautiful world"
|
||||
right = "Hi world"
|
||||
|
||||
result = reconcile(parent, left, right)
|
||||
print(result["text"]) # "Hi beautiful world"
|
||||
```
|
||||
|
||||
See the [merge-file example](examples/merge_file.py) for a file-merging CLI, or the [advanced examples document](docs/advanced-python.md) for cursor tracking, change provenance, and compact diffs.
|
||||
|
||||
## Motivation
|
||||
|
||||
|
|
@ -86,30 +114,81 @@ This creates **Differential Synchronisation** scenarios ([2], [3]): we only know
|
|||
|
||||
> **Note**: Some text domains require more careful handling. Legal contracts, for instance, could have unintended meaning changes from conflicting edits that create double negations. At the same time, semantic conflicts can still arise when merging code, even in the absence of syntactic conflicts.
|
||||
|
||||
Differential sync is implemented by [universal-sync](https://github.com/invisible-college/universal-sync) and my Obsidian plugin [vault-link](https://github.com/schmelczer/vault-link), and it requires a merging tool which creates conflict-free results for the best user experience.
|
||||
Differential sync is implemented by [universal-sync][12], and it requires a merging tool that creates conflict-free results for the best user experience.
|
||||
|
||||
## How it works
|
||||
|
||||
`reconcile-text` starts off similarly to `diff3` ([4], [5]) but adds automated conflict resolution. Given a **parent** document and two modified versions (`left` and `right`), the following happens:
|
||||
|
||||
1. **Tokenisation** — Input texts get split into meaningful units (words, characters, etc.) for granular merging
|
||||
2. **Diff computation** — Myers' algorithm calculates differences between (parent ↔ left) and (parent ↔ right)
|
||||
3. **Diff optimisation** — Operations are reordered and consolidated to maximise chained changes
|
||||
4. **Operational Transformation** — Edits are woven together using OT principles, preserving all modifications and updating cursors
|
||||
1. **Tokenisation** - Input texts are split into meaningful units (words, characters, etc.) for granular merging
|
||||
2. **Diff computation** - Myers' algorithm calculates differences between (parent ↔ left) and (parent ↔ right)
|
||||
3. **Diff optimisation** - Operations are reordered and consolidated to maximise chained changes
|
||||
4. **Operational Transformation** - Edits are woven together using OT principles, preserving all modifications and updating cursors
|
||||
|
||||
Whilst the primary goal of `reconcile-text` isn't to implement OT, it provides an elegant way to merge Myers' diff outputs. (For a dedicated Rust OT implementation, see [operational-transform-rs](https://github.com/spebern/operational-transform-rs).) The same could be achieved with CRDTs, which many libraries implement well for text—see [Loro](https://github.com/loro-dev/loro/), [cola](https://github.com/nomad/cola), and [automerge](https://github.com/automerge/automerge) as excellent examples.
|
||||
Whilst the primary goal of `reconcile-text` isn't to implement OT, it provides an elegant way to merge Myers' diff outputs. (For a dedicated Rust OT implementation, see [operational-transform-rs][13].) The same could be achieved with CRDTs, which many libraries implement well for text (see [Loro][14], [cola][15], and [automerge][16]).
|
||||
|
||||
However, when only the end result of concurrent changes is observable, merge quality depends entirely on the quality of the underlying 2-way diffs. For instance, `move` operations cannot be supported because Myers' algorithm decomposes them into separate `insert` and `delete` operations, regardless of the merging algorithm used.
|
||||
|
||||
## Comparison with other approaches
|
||||
|
||||
### Traditional 3-way merge (diff3, Git)
|
||||
|
||||
Tools like `diff3` ([4]) and Git produce **conflict markers** (`<<<<<<<` / `=======` / `>>>>>>>`) when both sides modify the same region. This works for source code where a human must verify correctness, but breaks the reading flow for prose. `reconcile-text` uses the same diff3-like foundation but adds an OT-inspired resolution step that eliminates conflict markers entirely. Libraries like [diffy][17], [merge3][18] (Rust), and [node-diff3][19] (JavaScript) all fall into this category.
|
||||
|
||||
### diff-match-patch
|
||||
|
||||
[diff-match-patch][6] is a widely-used library created by Neil Fraser at Google in 2006, providing character-level diffing (Myers' algorithm), fuzzy string matching (Bitap algorithm), and patch application. It powers Fraser's **Differential Synchronisation** protocol ([2]): compute a diff between two texts, apply the patch to a third text that may have drifted, and repeat until convergence. If a patch fails, the failure self-corrects in the next sync cycle.
|
||||
|
||||
The key differences from `reconcile-text`:
|
||||
|
||||
- **2-way vs 3-way** - diff-match-patch diffs two texts and applies the result as a patch. It has no concept of a common ancestor and cannot reason about "left changes" vs "right changes". `reconcile-text` performs true 3-way merging, understanding the intent behind each side's edits.
|
||||
|
||||
- **Character-level only** - Word-level and line-level diffs require encoding tokens as single Unicode characters before diffing ([7]). `reconcile-text` supports word, character, line, and custom tokenisation natively.
|
||||
|
||||
- **Patches can fail** - `patch_apply` returns a boolean array indicating success per patch; failed patches are silently dropped. In Differential Synchronisation, failures self-correct in the next cycle, but for one-shot merges edits can be lost. `reconcile-text` always produces a complete merged result.
|
||||
|
||||
- **No cursor tracking or change provenance** - diff-match-patch does not reposition cursors or track which side made which edit. `reconcile-text` does both automatically.
|
||||
|
||||
See the [comparison example](examples/compare-with-diff-match-patch.rs) for concrete cases where diff-match-patch garbles adjacent edits and silently drops an entire sentence, while `reconcile-text` merges both users' changes correctly.
|
||||
|
||||
> **When to use diff-match-patch instead**: when you don't have a common ancestor, for example synchronising texts that have diverged through an unknown sequence of edits. If you have a common ancestor (as in most version control and collaborative editing scenarios), `reconcile-text` produces more reliable results.
|
||||
|
||||
### CRDTs (Yjs, Automerge, Loro, diamond-types)
|
||||
|
||||
Conflict-free Replicated Data Types guarantee convergence by mathematical construction: every operation commutes, so the order of application doesn't matter. Libraries like [Yjs][20] (and its Rust port [Yrs][21]), [Automerge][16], [Loro][14], [cola][15], and [diamond-types][22] implement this approach.
|
||||
|
||||
CRDTs capture every individual keystroke or operation, assigning each a unique identity. This makes them ideal when you control the complete editing infrastructure: the editor, the transport layer, and the storage format. They work peer-to-peer, handle arbitrary numbers of concurrent editors, and never lose an edit.
|
||||
|
||||
The trade-off is that CRDTs require **maintaining document state over time** - an operation log or internal data structure that grows with the document's edit history. You cannot simply hand a CRDT library three plain strings and get a merged result. This makes them unsuitable for Differential Synchronisation scenarios where you only observe the final state of each document, which is exactly the niche `reconcile-text` fills.
|
||||
|
||||
> **When to use CRDTs instead**: if you control the complete editing stack and can capture every operation as it happens, CRDTs provide stronger convergence guarantees. They also support more than two concurrent editors naturally, whereas `reconcile-text` merges exactly two forks at a time (though merges can be chained).
|
||||
|
||||
### Operational Transformation (OT)
|
||||
|
||||
OT libraries like [ot.js][23] and [ShareJS][24] transform concurrent operations against each other so that applying them in any order produces the same result. Like CRDTs, they capture individual operations and require infrastructure to coordinate them, typically a central server that determines the canonical operation order.
|
||||
|
||||
`reconcile-text` borrows the *concept* of OT (transforming one side's edits against the other) but applies it to a different problem. Instead of transforming individual keystrokes in real time, it transforms the consolidated diff output of two complete edits. This means it doesn't need a server, doesn't need to capture operations as they happen, and works entirely offline.
|
||||
|
||||
> **When to use OT instead**: if you need real-time collaboration with sub-second latency and can run a coordination server, dedicated OT libraries handle this well. `reconcile-text` is designed for merge points, not live keystroke-by-keystroke synchronisation.
|
||||
|
||||
## Development
|
||||
|
||||
Contributions are welcome!
|
||||
|
||||
### Environment
|
||||
|
||||
#### Python setup
|
||||
|
||||
Install [uv](https://docs.astral.sh/uv/getting-started/installation/) and build the extension for development:
|
||||
|
||||
```sh
|
||||
cd reconcile-python
|
||||
uv run maturin develop
|
||||
```
|
||||
|
||||
#### Node.js setup
|
||||
|
||||
1. Install [nvm](https://github.com/nvm-sh/nvm):
|
||||
1. Install [nvm][25]:
|
||||
```sh
|
||||
curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.1/install.sh | bash
|
||||
```
|
||||
|
|
@ -117,21 +196,17 @@ Contributions are welcome!
|
|||
```sh
|
||||
nvm install 22 && nvm use 22
|
||||
```
|
||||
3. Optionally, set as default:
|
||||
3. Optionally, set as default:
|
||||
```sh
|
||||
nvm alias default 22
|
||||
```
|
||||
|
||||
#### Rust toolchain
|
||||
|
||||
1. Install [rustup](https://rustup.rs):
|
||||
Install [rustup][26]:
|
||||
```bash
|
||||
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
|
||||
```
|
||||
2. Install additional tools:
|
||||
```bash
|
||||
cargo install wasm-pack cargo-insta cargo-edit
|
||||
```
|
||||
|
||||
### Scripts
|
||||
|
||||
|
|
@ -145,8 +220,30 @@ Contributions are welcome!
|
|||
|
||||
[MIT](./LICENSE)
|
||||
|
||||
[1]:https://marijnhaverbeke.nl/blog/collaborative-editing-cm.html
|
||||
[2]: https://neil.fraser.name/writing/sync/
|
||||
[1]: https://marijnhaverbeke.nl/blog/collaborative-editing-cm.html
|
||||
[2]: https://neil.fraser.name/writing/sync/
|
||||
[3]: https://www.cis.upenn.edu/~bcpierce/papers/diff3-short.pdf
|
||||
[4]: https://blog.jcoglan.com/2017/05/08/merging-with-diff3/
|
||||
[5]: https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/35605.pdf
|
||||
[6]: https://github.com/google/diff-match-patch
|
||||
[7]: https://github.com/google/diff-match-patch/wiki/Line-or-Word-Diffs
|
||||
[8]: https://schmelczer.dev/reconcile
|
||||
[9]: https://crates.io/crates/reconcile-text
|
||||
[10]: https://www.npmjs.com/package/reconcile-text
|
||||
[11]: https://docs.rs/reconcile-text/latest/reconcile_text
|
||||
[12]: https://github.com/invisible-college/universal-sync
|
||||
[13]: https://github.com/spebern/operational-transform-rs
|
||||
[14]: https://github.com/loro-dev/loro/
|
||||
[15]: https://github.com/nomad/cola
|
||||
[16]: https://github.com/automerge/automerge
|
||||
[17]: https://crates.io/crates/diffy
|
||||
[18]: https://github.com/breezy-team/merge3-rs
|
||||
[19]: https://github.com/bhousel/node-diff3
|
||||
[20]: https://github.com/yjs/yjs
|
||||
[21]: https://github.com/y-crdt/y-crdt
|
||||
[22]: https://github.com/josephg/diamond-types
|
||||
[23]: https://ot.js.org/
|
||||
[24]: https://github.com/josephg/ShareJS
|
||||
[25]: https://github.com/nvm-sh/nvm
|
||||
[26]: https://rustup.rs
|
||||
[27]: https://pypi.org/project/reconcile-text/
|
||||
|
|
|
|||
92
docs/advanced-python.md
Normal file
92
docs/advanced-python.md
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
# Advanced Usage (Python)
|
||||
|
||||
## Edit Provenance
|
||||
|
||||
Track which changes came from where using `reconcile_with_history`:
|
||||
|
||||
```python
|
||||
from reconcile_text import reconcile_with_history
|
||||
|
||||
result = reconcile_with_history(
|
||||
"Hello world",
|
||||
"Hello beautiful world",
|
||||
"Hi world",
|
||||
)
|
||||
|
||||
print(result["text"]) # "Hi beautiful world"
|
||||
print(result["history"]) #
|
||||
# [
|
||||
# {"text": "Hello", "history": "RemovedFromRight"},
|
||||
# {"text": "Hi", "history": "AddedFromRight"},
|
||||
# {"text": " beautiful", "history": "AddedFromLeft"},
|
||||
# {"text": " ", "history": "Unchanged"},
|
||||
# {"text": "world", "history": "Unchanged"},
|
||||
# ]
|
||||
```
|
||||
|
||||
## Tokenization Strategies
|
||||
|
||||
`reconcile-text` offers different approaches to split text for merging:
|
||||
|
||||
- **Word tokenizer** (`"Word"`) - Splits on word boundaries (recommended for prose)
|
||||
- **Character tokenizer** (`"Character"`) - Individual characters (fine-grained control)
|
||||
- **Line tokenizer** (`"Line"`) - Line-by-line (similar to `git merge` or more precisely [`git merge-file`](https://git-scm.com/docs/git-merge-file))
|
||||
- **Markdown tokenizer** (`"Markdown"`) - Splits on Markdown structural boundaries (headings, list items, paragraphs)
|
||||
|
||||
```python
|
||||
from reconcile_text import reconcile
|
||||
|
||||
result = reconcile("abc", "axc", "abyc", "Character")
|
||||
print(result["text"]) # "axyc"
|
||||
```
|
||||
|
||||
## Cursor Tracking
|
||||
|
||||
`reconcile-text` automatically tracks cursor positions through merges, which is useful for collaborative editors. Selections can be tracked by providing them as a pair of cursors.
|
||||
|
||||
```python
|
||||
from reconcile_text import reconcile
|
||||
|
||||
result = reconcile(
|
||||
"Hello world",
|
||||
{
|
||||
"text": "Hello beautiful world",
|
||||
"cursors": [{"id": 1, "position": 6}], # After "Hello "
|
||||
},
|
||||
{
|
||||
"text": "Hi world",
|
||||
"cursors": [{"id": 2, "position": 0}], # At the beginning
|
||||
},
|
||||
)
|
||||
|
||||
# Result: "Hi beautiful world" with repositioned cursors
|
||||
print(result["text"]) # "Hi beautiful world"
|
||||
print(result["cursors"]) # [{"id": 2, "position": 0}, {"id": 1, "position": 3}]
|
||||
```
|
||||
|
||||
> The `cursors` list is sorted by character position (not IDs).
|
||||
|
||||
## Compact Diffs
|
||||
|
||||
Generate and apply compact diff representations:
|
||||
|
||||
```python
|
||||
from reconcile_text import diff, undiff
|
||||
|
||||
original = "Hello world"
|
||||
changed = "Hello beautiful world"
|
||||
|
||||
# Generate a compact diff
|
||||
d = diff(original, changed)
|
||||
print(d) # [5, ' beautiful world']
|
||||
|
||||
# Reconstruct the changed text from the diff
|
||||
reconstructed = undiff(original, d)
|
||||
assert reconstructed == changed
|
||||
```
|
||||
|
||||
Diff entries are positive integers (retain N characters), negative integers (delete N characters), and strings (insert text).
|
||||
|
||||
## File Merging Example
|
||||
|
||||
For a complete file-merging CLI (a trivial `git merge-file`), see [`examples/merge_file.py`](../examples/merge_file.py).
|
||||
|
|
@ -2,69 +2,230 @@
|
|||
|
||||
## Edit Provenance
|
||||
|
||||
Track which changes came from where using `reconcileWithHistory`:
|
||||
Track which changes came from where using `reconcileWithHistory`. The result's
|
||||
`history` field is typed as `SpanWithHistory[]`, and each span's `history` is a
|
||||
`History` string-literal union.
|
||||
|
||||
```javascript
|
||||
const result = reconcileWithHistory(
|
||||
'Hello world',
|
||||
'Hello beautiful world',
|
||||
'Hi world'
|
||||
);
|
||||
```typescript
|
||||
import { reconcileWithHistory, type History, type SpanWithHistory } from 'reconcile-text';
|
||||
|
||||
console.log(result.text); // "Hi beautiful world"
|
||||
console.log(result.history); /*
|
||||
[
|
||||
{
|
||||
"text": "Hello",
|
||||
"history": "RemovedFromRight"
|
||||
},
|
||||
{
|
||||
"text": "Hi",
|
||||
"history": "AddedFromRight"
|
||||
},
|
||||
{
|
||||
"text": " beautiful",
|
||||
"history": "AddedFromLeft"
|
||||
},
|
||||
{
|
||||
"text": " ",
|
||||
"history": "Unchanged"
|
||||
},
|
||||
{
|
||||
"text": "world",
|
||||
"history": "Unchanged"
|
||||
const result = reconcileWithHistory('Hello world', 'Hello beautiful world', 'Hi world');
|
||||
|
||||
console.log(result.text); // "Hi beautiful world"
|
||||
|
||||
const history: SpanWithHistory[] = result.history;
|
||||
console.log(history);
|
||||
// [
|
||||
// { text: "Hello", history: "RemovedFromRight" },
|
||||
// { text: "Hi", history: "AddedFromRight" },
|
||||
// { text: " beautiful", history: "AddedFromLeft" },
|
||||
// { text: " ", history: "Unchanged" },
|
||||
// { text: "world", history: "Unchanged" },
|
||||
// ]
|
||||
|
||||
const classByHistory = {
|
||||
Unchanged: 'merge-unchanged',
|
||||
AddedFromLeft: 'merge-added-left',
|
||||
AddedFromRight: 'merge-added-right',
|
||||
RemovedFromLeft: 'merge-removed-left',
|
||||
RemovedFromRight: 'merge-removed-right',
|
||||
} satisfies Record<History, string>;
|
||||
```
|
||||
|
||||
Using `satisfies Record<History, string>` keeps the object literal's values
|
||||
narrow while forcing every history case to be handled. If a future version adds
|
||||
another `History` value, TypeScript will point at this mapping.
|
||||
|
||||
For control flow, use the same union as an exhaustiveness check:
|
||||
|
||||
```typescript
|
||||
import type { History } from 'reconcile-text';
|
||||
|
||||
function historyLabel(history: History): string {
|
||||
switch (history) {
|
||||
case 'Unchanged':
|
||||
return 'unchanged';
|
||||
case 'AddedFromLeft':
|
||||
return 'added by left';
|
||||
case 'AddedFromRight':
|
||||
return 'added by right';
|
||||
case 'RemovedFromLeft':
|
||||
return 'removed from left';
|
||||
case 'RemovedFromRight':
|
||||
return 'removed from right';
|
||||
default:
|
||||
return assertNever(history);
|
||||
}
|
||||
]
|
||||
*/
|
||||
}
|
||||
|
||||
function assertNever(value: never): never {
|
||||
throw new Error(`Unhandled history value: ${value}`);
|
||||
}
|
||||
```
|
||||
|
||||
## Tokenisation Strategies
|
||||
|
||||
Reconcile offers different approaches to split text for merging:
|
||||
`reconcile-text` offers different approaches to split text for merging:
|
||||
|
||||
- **Word tokeniser** (`"Word"`) — Splits on word boundaries (recommended for prose)
|
||||
- **Character tokeniser** (`"Character"`) — Individual characters (fine-grained control)
|
||||
- **Line tokeniser** (`"Line"`) — Line-by-line (similar to `git merge` or more precisely [`git merge-file`](https://git-scm.com/docs/git-merge-file))
|
||||
- **Word tokeniser** (`"Word"`) - Splits on word boundaries (recommended for prose)
|
||||
- **Character tokeniser** (`"Character"`) - Individual characters (fine-grained control)
|
||||
- **Line tokeniser** (`"Line"`) - Line-by-line (similar to `git merge` or more precisely [`git merge-file`](https://git-scm.com/docs/git-merge-file))
|
||||
- **Markdown tokeniser** (`"Markdown"`) - Splits on Markdown structural boundaries (headings, list items, paragraphs)
|
||||
|
||||
```typescript
|
||||
import { reconcile, type BuiltinTokenizer } from 'reconcile-text';
|
||||
|
||||
const tokenizers = [
|
||||
'Word',
|
||||
'Character',
|
||||
'Line',
|
||||
'Markdown',
|
||||
] as const satisfies readonly BuiltinTokenizer[];
|
||||
|
||||
const result = reconcile('abc', 'axc', 'abyc', 'Character');
|
||||
console.log(result.text); // "axyc"
|
||||
|
||||
for (const tokenizer of tokenizers) {
|
||||
const merged = reconcile(
|
||||
'# Title\n\n- old item\n',
|
||||
'# Title\n\n- old item\n- left item\n',
|
||||
'# New title\n\n- old item\n',
|
||||
tokenizer
|
||||
);
|
||||
|
||||
console.log(tokenizer, merged.text);
|
||||
}
|
||||
```
|
||||
|
||||
## Cursor Tracking
|
||||
|
||||
Reconcile automatically tracks cursor positions through merges, which is handy in collaborative editors. Selections can be tracked by providing them as a pair of cursors.
|
||||
`reconcile-text` automatically tracks cursor positions through merges, which is
|
||||
useful for collaborative editors. Selections can be tracked by providing them as
|
||||
a pair of cursors.
|
||||
|
||||
```javascript
|
||||
const result = reconcile(
|
||||
'Hello world',
|
||||
{
|
||||
text: 'Hello beautiful world',
|
||||
cursors: [{ id: 1, position: 6 }], // After "Hello "
|
||||
},
|
||||
{
|
||||
text: 'Hi world',
|
||||
cursors: [{ id: 2, position: 0 }], // At the beginning
|
||||
}
|
||||
);
|
||||
```typescript
|
||||
import { reconcile, type TextWithOptionalCursors } from 'reconcile-text';
|
||||
|
||||
const left = {
|
||||
text: 'Hello beautiful world',
|
||||
cursors: [{ id: 1, position: 6 }], // After "Hello "
|
||||
} satisfies TextWithOptionalCursors;
|
||||
|
||||
const right = {
|
||||
text: 'Hi world',
|
||||
cursors: [{ id: 2, position: 0 }], // At the beginning
|
||||
} satisfies TextWithOptionalCursors;
|
||||
|
||||
const result = reconcile('Hello world', left, right);
|
||||
|
||||
// Result: "Hi beautiful world" with repositioned cursors
|
||||
console.log(result.text); // "Hi beautiful world"
|
||||
console.log(result.text); // "Hi beautiful world"
|
||||
console.log(result.cursors); // [{ id: 2, position: 0 }, { id: 1, position: 3 }]
|
||||
```
|
||||
|
||||
> The `cursors` list is sorted by character position (not IDs).
|
||||
|
||||
## Generic Helpers and Inference
|
||||
|
||||
The exported merge functions are intentionally small: they merge strings, or
|
||||
strings plus cursor metadata. In TypeScript applications, keep domain-specific
|
||||
metadata in your own typed wrappers and let inference preserve the surrounding
|
||||
shape.
|
||||
|
||||
```typescript
|
||||
import { reconcile, type BuiltinTokenizer } from 'reconcile-text';
|
||||
|
||||
type ReconciledText<T extends { text: string }> = Omit<T, 'text'> & {
|
||||
text: string;
|
||||
};
|
||||
|
||||
function reconcileDraft<TDraft extends { text: string }>(
|
||||
parent: TDraft,
|
||||
left: TDraft,
|
||||
right: TDraft,
|
||||
tokenizer?: BuiltinTokenizer
|
||||
): ReconciledText<TDraft> {
|
||||
return {
|
||||
...right,
|
||||
text: reconcile(parent.text, left.text, right.text, tokenizer).text,
|
||||
};
|
||||
}
|
||||
|
||||
interface MarkdownDraft {
|
||||
id: string;
|
||||
text: string;
|
||||
updatedAt: Date;
|
||||
}
|
||||
|
||||
const parent: MarkdownDraft = {
|
||||
id: 'intro',
|
||||
text: '# Title\n\nOld text\n',
|
||||
updatedAt: new Date('2026-01-01T00:00:00Z'),
|
||||
};
|
||||
|
||||
const left: MarkdownDraft = {
|
||||
...parent,
|
||||
text: '# Title\n\nOld text\n\n- left note\n',
|
||||
};
|
||||
|
||||
const right: MarkdownDraft = {
|
||||
...parent,
|
||||
text: '# New title\n\nOld text\n',
|
||||
};
|
||||
|
||||
const merged = reconcileDraft(parent, left, right, 'Markdown');
|
||||
// merged is inferred as { id: string; updatedAt: Date; text: string }
|
||||
```
|
||||
|
||||
Use `satisfies` for configuration objects and cursor payloads when you want
|
||||
compile-time checking without widening everything to the library interface.
|
||||
|
||||
```typescript
|
||||
import type { BuiltinTokenizer, TextWithOptionalCursors } from 'reconcile-text';
|
||||
|
||||
const mergeOptions = {
|
||||
tokenizer: 'Markdown',
|
||||
renderDeletedSpans: true,
|
||||
} satisfies {
|
||||
tokenizer: BuiltinTokenizer;
|
||||
renderDeletedSpans: boolean;
|
||||
};
|
||||
|
||||
const documentWithSelection = {
|
||||
text: 'Hello beautiful world',
|
||||
cursors: [
|
||||
{ id: 1, position: 6 },
|
||||
{ id: 2, position: 15 },
|
||||
],
|
||||
} satisfies TextWithOptionalCursors;
|
||||
```
|
||||
|
||||
## Compact Diffs
|
||||
|
||||
Generate and apply compact diff representations. The TypeScript type is
|
||||
`Array<number | string>` for `diff()` and `Array<number | bigint | string>` for
|
||||
`undiff()`, because the underlying WebAssembly layer may represent integer
|
||||
entries as `bigint`.
|
||||
|
||||
```typescript
|
||||
import { diff, undiff } from 'reconcile-text';
|
||||
|
||||
const original = 'Hello world';
|
||||
const changed = 'Hello beautiful world';
|
||||
|
||||
// Generate a compact diff
|
||||
const changes = diff(original, changed);
|
||||
console.log(changes); // [5, " beautiful world"]
|
||||
|
||||
// Reconstruct the changed text from the diff
|
||||
const reconstructed = undiff(original, changes);
|
||||
console.assert(reconstructed === changed);
|
||||
```
|
||||
|
||||
Diff entries are positive integers (retain N characters), negative integers
|
||||
(delete N characters), and strings (insert text).
|
||||
|
||||
## Complete Example
|
||||
|
||||
For a complete browser example that renders `SpanWithHistory` values and cursor
|
||||
selections, see the [example website source](../examples/website/src/index.ts).
|
||||
|
|
|
|||
95
examples/compare-with-diff-match-patch.rs
Normal file
95
examples/compare-with-diff-match-patch.rs
Normal file
|
|
@ -0,0 +1,95 @@
|
|||
use std::panic;
|
||||
|
||||
use diff_match_patch_rs::{Compat, DiffMatchPatch, PatchInput};
|
||||
use reconcile_text::{BuiltinTokenizer, reconcile};
|
||||
|
||||
fn dmp_merge(parent: &str, left: &str, right: &str) -> Option<String> {
|
||||
let parent = parent.to_owned();
|
||||
let left = left.to_owned();
|
||||
let right = right.to_owned();
|
||||
|
||||
// diff-match-patch-rs can panic on some inputs, so we catch that.
|
||||
panic::catch_unwind(|| {
|
||||
let dmp = DiffMatchPatch::new();
|
||||
let diffs = dmp.diff_main::<Compat>(&parent, &left).ok()?;
|
||||
let patches = dmp
|
||||
.patch_make(PatchInput::new_text_diffs(&parent, &diffs))
|
||||
.ok()?;
|
||||
let (result, _) = dmp.patch_apply(&patches, &right).ok()?;
|
||||
Some(result)
|
||||
})
|
||||
.ok()
|
||||
.flatten()
|
||||
}
|
||||
|
||||
fn try_merge(parent: &str, left: &str, right: &str) {
|
||||
let dmp_result = dmp_merge(parent, left, right);
|
||||
|
||||
let reconcile_result = reconcile(
|
||||
parent,
|
||||
&left.into(),
|
||||
&right.into(),
|
||||
&*BuiltinTokenizer::Word,
|
||||
)
|
||||
.apply()
|
||||
.text();
|
||||
|
||||
println!("Parent: {parent:?}");
|
||||
println!("Left: {left:?}");
|
||||
println!("Right: {right:?}");
|
||||
println!();
|
||||
match dmp_result {
|
||||
Some(r) => println!("diff-match-patch: {r:?}"),
|
||||
None => println!("diff-match-patch: <panic or error>"),
|
||||
}
|
||||
println!("reconcile-text: {reconcile_result:?}");
|
||||
println!();
|
||||
}
|
||||
|
||||
/// Demonstrates cases where diff-match-patch silently produces incorrect
|
||||
/// output, while reconcile-text preserves both users' edits correctly
|
||||
///
|
||||
/// Run it with:
|
||||
/// `cargo run --example compare-with-diff-match-patch`
|
||||
fn main() {
|
||||
// Example 1
|
||||
// Two users edit the same short phrase. Alice replaces "old(!)" with
|
||||
// "new improved", Bob replaces "broken" with "working". These are
|
||||
// independent changes to adjacent words.
|
||||
//
|
||||
// diff-match-patch has no common ancestor, so it diffs parent → left
|
||||
// and applies the patch to right. The character-level patches overlap
|
||||
// and produce garbled text ("impovind"). It reports success.
|
||||
//
|
||||
// reconcile-text sees both changes relative to the parent and merges
|
||||
// them cleanly.
|
||||
|
||||
println!("── Example 1: adjacent edits ──");
|
||||
try_merge(
|
||||
"old(!) broken code",
|
||||
"new improved code",
|
||||
"old(!) working code",
|
||||
);
|
||||
|
||||
// Example 2
|
||||
// Alice adds a sentence. Bob rewrites the surrounding text. Because
|
||||
// diff-match-patch works without a common ancestor, Alice's entire
|
||||
// sentence is silently lost.
|
||||
|
||||
println!("── Example 2: sentence lost ──");
|
||||
// Alice adds a sentence in the middle of a paragraph. Bob rephrases
|
||||
// the same paragraph. Because the patch context from Alice's edit no
|
||||
// longer appears in Bob's version, diff-match-patch silently drops
|
||||
// Alice's entire sentence.
|
||||
//
|
||||
// reconcile-text understands both edits relative to the common ancestor
|
||||
// and keeps both.
|
||||
try_merge(
|
||||
"We used the existing parsing approach for processing. The output was saved to the \
|
||||
database.",
|
||||
"We used the existing parsing approach for processing. Always validate the schema! The \
|
||||
output was saved to the database.",
|
||||
"We adopted a brand new analysis pipeline for execution. The results were written to \
|
||||
cloud storage.",
|
||||
);
|
||||
}
|
||||
38
examples/merge_file.py
Normal file
38
examples/merge_file.py
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
"""Merge three versions of a file: mine, base, and theirs.
|
||||
|
||||
A trivial version of git merge-file (https://git-scm.com/docs/git-merge-file).
|
||||
|
||||
Run it with:
|
||||
uv run --directory reconcile-python \
|
||||
python ../examples/merge_file.py my.txt base.txt their.txt [output.txt]
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from reconcile_text import reconcile
|
||||
|
||||
|
||||
def main() -> None:
|
||||
args = sys.argv[1:]
|
||||
|
||||
if len(args) < 3 or len(args) > 4:
|
||||
print("Usage: merge_file.py <mine> <base> <theirs> [output]", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
mine = Path(args[0]).read_text()
|
||||
base = Path(args[1]).read_text()
|
||||
theirs = Path(args[2]).read_text()
|
||||
|
||||
result = reconcile(base, mine, theirs)
|
||||
|
||||
if len(args) == 4:
|
||||
Path(args[3]).write_text(result["text"])
|
||||
else:
|
||||
print(result["text"], end="")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
813
examples/website/package-lock.json
generated
813
examples/website/package-lock.json
generated
File diff suppressed because it is too large
Load diff
|
|
@ -22,22 +22,22 @@
|
|||
],
|
||||
"homepage": "https://github.com/schmelczer/reconcile#readme",
|
||||
"devDependencies": {
|
||||
"copy-webpack-plugin": "^13.0.0",
|
||||
"css-loader": "^7.1.2",
|
||||
"html-webpack-plugin": "^5.6.3",
|
||||
"copy-webpack-plugin": "^14.0.0",
|
||||
"css-loader": "^7.1.4",
|
||||
"html-webpack-plugin": "^5.6.6",
|
||||
"inline-source-webpack-plugin": "^3.0.1",
|
||||
"mini-css-extract-plugin": "^2.9.2",
|
||||
"prettier": "^3.6.2",
|
||||
"mini-css-extract-plugin": "^2.10.1",
|
||||
"prettier": "^3.8.1",
|
||||
"reconcile-text": "file:../../reconcile-js",
|
||||
"resolve-url-loader": "^5.0.0",
|
||||
"sass": "^1.89.2",
|
||||
"sass-loader": "^16.0.5",
|
||||
"sass": "^1.98.0",
|
||||
"sass-loader": "^16.0.7",
|
||||
"svg-inline-loader": "^0.8.2",
|
||||
"terser-webpack-plugin": "^5.3.14",
|
||||
"ts-loader": "^9.5.2",
|
||||
"typescript": "^5.8.3",
|
||||
"webpack": "^5.99.9",
|
||||
"terser-webpack-plugin": "^5.4.0",
|
||||
"ts-loader": "^9.5.4",
|
||||
"typescript": "^5.9.3",
|
||||
"webpack": "^5.105.4",
|
||||
"webpack-cli": "^6.0.1",
|
||||
"webpack-dev-server": "^5.2.2"
|
||||
"webpack-dev-server": "^5.2.3"
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,21 +8,28 @@
|
|||
/>
|
||||
<meta
|
||||
name="description"
|
||||
content="3-way text merging that automatically resolves conflicts. No more Git conflict markers — just clean, merged results."
|
||||
content="3-way text merging that automatically resolves conflicts. No more Git conflict markers - just clean, merged results."
|
||||
/>
|
||||
<meta property="og:title" content="3-Way Text Merge" />
|
||||
<meta
|
||||
property="og:description"
|
||||
content="3-way text merging that automatically resolves conflicts. No more Git conflict markers — just clean, merged results."
|
||||
content="3-way text merging that automatically resolves conflicts. No more Git conflict markers - just clean, merged results."
|
||||
/>
|
||||
<meta property="og:type" content="website" />
|
||||
<meta property="og:url" content="https://schmelczer.dev/reconcile" />
|
||||
<meta property="og:image" content="/og-image.png" />
|
||||
<meta property="og:image" content="https://schmelczer.dev/reconcile/og-image.png" />
|
||||
<meta property="og:image:width" content="1200" />
|
||||
<meta property="og:image:height" content="630" />
|
||||
<meta name="twitter:card" content="summary_large_image" />
|
||||
<link rel="icon" type="image/x-icon" href="favicon.ico" />
|
||||
<title>reconcile-text: conflict-free 3-way text merging</title>
|
||||
<link inline inline-asset="index.css" inline-asset-delete />
|
||||
<script
|
||||
defer
|
||||
data-domain="reconcile"
|
||||
data-api="https://stats.schmelczer.dev/status"
|
||||
src="https://stats.schmelczer.dev/js/script.outbound-links.js"
|
||||
></script>
|
||||
</head>
|
||||
<body>
|
||||
<div class="background"></div>
|
||||
|
|
@ -79,7 +86,7 @@
|
|||
>documentation</a
|
||||
>
|
||||
or try editing the text boxes below to see <code>reconcile-text</code> in
|
||||
action. Use the tokenisation options to experiment with different approaches—
|
||||
action. Use the tokenisation options to experiment with different approaches -
|
||||
the Rust library also supports custom tokenisers.
|
||||
</p>
|
||||
</header>
|
||||
|
|
@ -101,13 +108,7 @@
|
|||
</div>
|
||||
</label>
|
||||
<label class="radio-option">
|
||||
<input
|
||||
type="radio"
|
||||
name="tokenizer"
|
||||
value="Word"
|
||||
id="tokenizer-word"
|
||||
checked
|
||||
/>
|
||||
<input type="radio" name="tokenizer" value="Word" id="tokenizer-word" />
|
||||
<span class="radio-custom" aria-hidden="true"></span>
|
||||
<div class="radio-content">
|
||||
<span class="radio-label">Word</span>
|
||||
|
|
@ -115,13 +116,17 @@
|
|||
</div>
|
||||
</label>
|
||||
<label class="radio-option">
|
||||
<input type="radio" name="tokenizer" value="Line" id="tokenizer-line" />
|
||||
<input
|
||||
type="radio"
|
||||
name="tokenizer"
|
||||
value="Markdown"
|
||||
id="tokenizer-markdown"
|
||||
checked
|
||||
/>
|
||||
<span class="radio-custom" aria-hidden="true"></span>
|
||||
<div class="radio-content">
|
||||
<span class="radio-label">Line</span>
|
||||
<span class="radio-description"
|
||||
>Line-by-line, like <code>git merge</code></span
|
||||
>
|
||||
<span class="radio-label">Markdown</span>
|
||||
<span class="radio-description">Preserve formatting</span>
|
||||
</div>
|
||||
</label>
|
||||
</div>
|
||||
|
|
@ -139,7 +144,7 @@
|
|||
<div class="text-area-card diamond-left">
|
||||
<label
|
||||
for="left"
|
||||
title="First user's edits — changes from this box appear in green in the result."
|
||||
title="First user's edits - changes from this box appear in green in the result."
|
||||
>
|
||||
First user's edits
|
||||
<div class="box Left"></div>
|
||||
|
|
@ -150,7 +155,7 @@
|
|||
<div class="text-area-card diamond-right">
|
||||
<label
|
||||
for="right"
|
||||
title="Second user's edits — changes from this box appear in blue in the result."
|
||||
title="Second user's edits - changes from this box appear in blue in the result."
|
||||
>
|
||||
Second user's edits
|
||||
<div class="box Right"></div>
|
||||
|
|
@ -161,7 +166,7 @@
|
|||
<div class="text-area-card diamond-result">
|
||||
<label
|
||||
for="merged"
|
||||
title="The automatically merged result — edit the boxes above to see changes in real-time."
|
||||
title="The automatically merged result - edit the boxes above to see changes in real-time."
|
||||
>
|
||||
Merged result
|
||||
<svg
|
||||
|
|
@ -184,34 +189,85 @@
|
|||
<path d="M3 3l18 18"></path>
|
||||
</svg>
|
||||
</label>
|
||||
<div id="merged" role="textbox" aria-readonly="true" aria-live="polite"></div>
|
||||
<div id="merged" role="textbox" aria-readonly="true" aria-live="off"></div>
|
||||
</div>
|
||||
</main>
|
||||
|
||||
<footer>
|
||||
<p>2025 Andras Schmelczer</p>
|
||||
<a
|
||||
href="https://github.com/schmelczer/reconcile"
|
||||
class="github-link"
|
||||
aria-label="GitHub repository"
|
||||
>
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
width="24"
|
||||
height="24"
|
||||
viewBox="0 0 24 24"
|
||||
fill="none"
|
||||
stroke="currentColor"
|
||||
stroke-width="2"
|
||||
stroke-linecap="round"
|
||||
stroke-linejoin="round"
|
||||
<p>© 2025-2026 András Schmelczer</p>
|
||||
<div class="footer-links">
|
||||
<a
|
||||
href="https://www.npmjs.com/package/reconcile-text"
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
aria-label="npm package"
|
||||
>
|
||||
<path stroke="none" d="M0 0h24v24H0z" fill="none" />
|
||||
<path
|
||||
d="M9 19c-4.3 1.4 -4.3 -2.5 -6 -3m12 5v-3.5c0 -1 .1 -1.4 -.5 -2c2.8 -.3 5.5 -1.4 5.5 -6a4.6 4.6 0 0 0 -1.3 -3.2a4.2 4.2 0 0 0 -.1 -3.2s-1.1 -.3 -3.5 1.3a12.3 12.3 0 0 0 -6.2 0c-2.4 -1.6 -3.5 -1.3 -3.5 -1.3a4.2 4.2 0 0 0 -.1 3.2a4.6 4.6 0 0 0 -1.3 3.2c0 4.6 2.7 5.7 5.5 6c-.6 .6 -.6 1.2 -.5 2v3.5"
|
||||
/>
|
||||
</svg>
|
||||
</a>
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
viewBox="0 0 24 24"
|
||||
fill="currentColor"
|
||||
>
|
||||
<path
|
||||
d="M1.763 0C.786 0 0 .786 0 1.763v20.474C0 23.214.786 24 1.763 24h20.474c.977 0 1.763-.786 1.763-1.763V1.763C24 .786 23.214 0 22.237 0zM5.13 5.323l13.837.019-.009 13.836h-3.464l.01-10.382h-3.456L12.04 19.17H5.113z"
|
||||
/>
|
||||
</svg>
|
||||
</a>
|
||||
<a
|
||||
href="https://pypi.org/project/reconcile-text/"
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
aria-label="PyPI package"
|
||||
>
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
viewBox="0 0 24 24"
|
||||
fill="currentColor"
|
||||
>
|
||||
<path
|
||||
d="M23.922 13.58v3.912L20.55 18.72l-.078.055.052.037 3.45-1.256.026-.036v-3.997l-.053-.036-.025.092zM23.621 5.618l-3.04 1.107v3.912l3.339-1.215V5.509zM23.92 13.457V9.544l-3.336 1.215v3.913zM20.47 14.71V10.8L17.17 12v3.913zM17.034 19.996v-3.912l-3.313 1.206v3.912zM17.17 16.057v3.868l3.314-1.206V14.85l-3.314 1.206zm2.093 1.882c-.367.134-.663-.074-.663-.463s.296-.814.663-.947c.365-.133.662.075.662.464s-.297.814-.662.946zM13.225 9.315l.365-.132-3.285-1.197-3.323 1.21.102.037 3.184 1.16zM20.507 10.664V6.751L17.17 7.965v3.913zM17.058 11.918V8.005l-3.302 1.202v3.912zM13.643 9.246l-3.336 1.215v3.913l3.336-1.215zM6.907 13.165l3.322 1.209v-3.913L6.907 9.252zM10.34 7.873l3.281 1.193V5.198l-3.28-1.193zM20.507 2.715L17.19 3.922v3.913l3.317-1.207zM16.95 3.903L13.724 2.73l-3.269 1.19 3.225 1.174zM15.365 4.606l-1.624.592v3.868l3.317-1.207V3.991l-1.693.615zm-.391 2.778c-.367.134-.662-.074-.662-.464s.295-.813.662-.946c.366-.133.663.074.663.464s-.297.813-.663.946zM10.229 18.41v-3.914l-3.322-1.209V17.2zM13.678 17.182v-3.913l-3.371 1.227v3.913zM13.756 17.154l3.3-1.2V12.04l-3.3 1.2zM13.678 21.217l-3.371 1.227v-3.912h-.078v3.912l-3.322-1.209v-3.913l-.053-.058-.025-.06-3.336-1.21v-3.948l.034.013 3.287 1.196.015-.078-3.261-1.187 3.26-1.187v-.109L3.876 9.62l-.307-.112 3.26-1.188v.877l.079-.055V6.769l3.257 1.185.058-.061L7.084 6.75l-.102-.037 3.24-1.179v-.083L6.854 6.677v.018l-.025.018v1.523L3.44 9.47v.02l-.025.017v4.007l-3.39 1.233v.019L0 14.784v3.995l.025.037 3.4 1.237.008-.006.007.01 3.4 1.238.008-.006.006.01 3.4 1.237.014-.009.012.01 3.45-1.256.026-.037-.078-.027zM3.493 9.563l3.257 1.185-3.257 1.187V9.562zM3.4 19.96L.078 18.752v-3.913l2.361.86.96.349v3.913zm.015-3.99L.335 14.85l-.182-.066 3.262-1.187v2.374zm3.399 5.231l-3.321-1.209v-3.912l3.321 1.209v3.912zM23.791 5.434l-3.21-1.17v2.338zM20.387 2.643l-3.24-1.18-3.27 1.19 3.247 1.182z"
|
||||
/>
|
||||
</svg>
|
||||
</a>
|
||||
<a
|
||||
href="https://crates.io/crates/reconcile-text"
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
aria-label="crates.io crate"
|
||||
>
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
viewBox="0 0 24 24"
|
||||
fill="currentColor"
|
||||
>
|
||||
<path
|
||||
d="M23.8346 11.7033l-1.0073-.6236a13.7268 13.7268 0 00-.0283-.2936l.8656-.8069a.3483.3483 0 00-.1154-.578l-1.1066-.414a8.4958 8.4958 0 00-.087-.2856l.6904-.9587a.3462.3462 0 00-.2257-.5446l-1.1663-.1894a9.3574 9.3574 0 00-.1407-.2622l.49-1.0761a.3437.3437 0 00-.0274-.3361.3486.3486 0 00-.3006-.154l-1.1845.0416a6.7444 6.7444 0 00-.1873-.2268l.2723-1.153a.3472.3472 0 00-.417-.4172l-1.1532.2724a14.0183 14.0183 0 00-.2278-.1873l.0415-1.1845a.3442.3442 0 00-.49-.328l-1.076.491c-.0872-.0476-.1742-.0952-.2623-.1407l-.1903-1.1673A.3483.3483 0 0016.256.955l-.9597.6905a8.4867 8.4867 0 00-.2855-.086l-.414-1.1066a.3483.3483 0 00-.5781-.1154l-.8069.8666a9.2936 9.2936 0 00-.2936-.0284L12.2946.1683a.3462.3462 0 00-.5892 0l-.6236 1.0073a13.7383 13.7383 0 00-.2936.0284L9.9803.3374a.3462.3462 0 00-.578.1154l-.4141 1.1065c-.0962.0274-.1903.0567-.2855.086L7.744.955a.3483.3483 0 00-.5447.2258L7.009 2.348a9.3574 9.3574 0 00-.2622.1407l-1.0762-.491a.3462.3462 0 00-.49.328l.0416 1.1845a7.9826 7.9826 0 00-.2278.1873L3.8413 3.425a.3472.3472 0 00-.4171.4171l.2713 1.1531c-.0628.075-.1255.1509-.1863.2268l-1.1845-.0415a.3462.3462 0 00-.328.49l.491 1.0761a9.167 9.167 0 00-.1407.2622l-1.1662.1894a.3483.3483 0 00-.2258.5446l.6904.9587a13.303 13.303 0 00-.087.2855l-1.1065.414a.3483.3483 0 00-.1155.5781l.8656.807a9.2936 9.2936 0 00-.0283.2935l-1.0073.6236a.3442.3442 0 000 .5892l1.0073.6236c.008.0982.0182.1964.0283.2936l-.8656.8079a.3462.3462 0 00.1155.578l1.1065.4141c.0273.0962.0567.1914.087.2855l-.6904.9587a.3452.3452 0 00.2268.5447l1.1662.1893c.0456.088.0922.1751.1408.2622l-.491 1.0762a.3462.3462 0 00.328.49l1.1834-.0415c.0618.0769.1235.1528.1873.2277l-.2713 1.1541a.3462.3462 0 00.4171.4161l1.153-.2713c.075.0638.151.1255.2279.1863l-.0415 1.1845a.3442.3442 0 00.49.327l1.0761-.49c.087.0486.1741.0951.2622.1407l.1903 1.1662a.3483.3483 0 00.5447.2268l.9587-.6904a9.299 9.299 0 00.2855.087l.414 1.1066a.3452.3452 0 00.5781.1154l.8079-.8656c.0972.0111.1954.0203.2936.0294l.6236 1.0073a.3472.3472 0 00.5892 0l.6236-1.0073c.0982-.0091.1964-.0183.2936-.0294l.8069.8656a.3483.3483 0 00.578-.1154l.4141-1.1066a8.4626 8.4626 0 00.2855-.087l.9587.6904a.3452.3452 0 00.5447-.2268l.1903-1.1662c.088-.0456.1751-.0931.2622-.1407l1.0762.49a.3472.3472 0 00.49-.327l-.0415-1.1845a6.7267 6.7267 0 00.2267-.1863l1.1531.2713a.3472.3472 0 00.4171-.416l-.2713-1.1542c.0628-.0749.1255-.1508.1863-.2278l1.1845.0415a.3442.3442 0 00.328-.49l-.49-1.076c.0475-.0872.0951-.1742.1407-.2623l1.1662-.1893a.3483.3483 0 00.2258-.5447l-.6904-.9587.087-.2855 1.1066-.414a.3462.3462 0 00.1154-.5781l-.8656-.8079c.0101-.0972.0202-.1954.0283-.2936l1.0073-.6236a.3442.3442 0 000-.5892zm-6.7413 8.3551a.7138.7138 0 01.2986-1.396.714.714 0 11-.2997 1.396zm-.3422-2.3142a.649.649 0 00-.7715.5l-.3573 1.6685c-1.1035.501-2.3285.7795-3.6193.7795a8.7368 8.7368 0 01-3.6951-.814l-.3574-1.6684a.648.648 0 00-.7714-.499l-1.473.3158a8.7216 8.7216 0 01-.7613-.898h7.1676c.081 0 .1356-.0141.1356-.088v-2.536c0-.074-.0536-.0881-.1356-.0881h-2.0966v-1.6077h2.2677c.2065 0 1.1065.0587 1.394 1.2088.0901.3533.2875 1.5044.4232 1.8729.1346.413.6833 1.2381 1.2685 1.2381h3.5716a.7492.7492 0 00.1296-.0131 8.7874 8.7874 0 01-.8119.9526zM6.8369 20.024a.714.714 0 11-.2997-1.396.714.714 0 01.2997 1.396zM4.1177 8.9972a.7137.7137 0 11-1.304.5791.7137.7137 0 011.304-.579zm-.8352 1.9813l1.5347-.6824a.65.65 0 00.33-.8585l-.3158-.7147h1.2432v5.6025H3.5669a8.7753 8.7753 0 01-.2834-3.348zm6.7343-.5437V8.7836h2.9601c.153 0 1.0792.1772 1.0792.8697 0 .575-.7107.7815-1.2948.7815zm10.7574 1.4862c0 .2187-.008.4363-.0243.651h-.9c-.09 0-.1265.0586-.1265.1477v.413c0 .973-.5487 1.1846-1.0296 1.2382-.4576.0517-.9648-.1913-1.0275-.4717-.2704-1.5186-.7198-1.8436-1.4305-2.4034.8817-.5599 1.799-1.386 1.799-2.4915 0-1.1936-.819-1.9458-1.3769-2.3153-.7825-.5163-1.6491-.6195-1.883-.6195H5.4682a8.7651 8.7651 0 014.907-2.7699l1.0974 1.151a.648.648 0 00.9182.0213l1.227-1.1743a8.7753 8.7753 0 016.0044 4.2762l-.8403 1.8982a.652.652 0 00.33.8585l1.6178.7188c.0283.2875.0425.577.0425.8717zm-9.3006-9.5993a.7128.7128 0 11.984 1.0316.7137.7137 0 01-.984-1.0316zm8.3389 6.71a.7107.7107 0 01.9395-.3625.7137.7137 0 11-.9405.3635z"
|
||||
/>
|
||||
</svg>
|
||||
</a>
|
||||
<a
|
||||
href="https://github.com/schmelczer/reconcile"
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
aria-label="GitHub repository"
|
||||
>
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
width="24"
|
||||
height="24"
|
||||
viewBox="0 0 24 24"
|
||||
fill="none"
|
||||
stroke="currentColor"
|
||||
stroke-width="2"
|
||||
stroke-linecap="round"
|
||||
stroke-linejoin="round"
|
||||
>
|
||||
<path stroke="none" d="M0 0h24v24H0z" fill="none" />
|
||||
<path
|
||||
d="M9 19c-4.3 1.4 -4.3 -2.5 -6 -3m12 5v-3.5c0 -1 .1 -1.4 -.5 -2c2.8 -.3 5.5 -1.4 5.5 -6a4.6 4.6 0 0 0 -1.3 -3.2a4.2 4.2 0 0 0 -.1 -3.2s-1.1 -.3 -3.5 1.3a12.3 12.3 0 0 0 -6.2 0c-2.4 -1.6 -3.5 -1.3 -3.5 -1.3a4.2 4.2 0 0 0 -.1 3.2a4.6 4.6 0 0 0 -1.3 3.2c0 4.6 2.7 5.7 5.5 6c-.6 .6 -.6 1.2 -.5 2v3.5"
|
||||
/>
|
||||
</svg>
|
||||
</a>
|
||||
</div>
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
import { reconcile, reconcileWithHistory } from 'reconcile-text';
|
||||
import { reconcileWithHistory } from 'reconcile-text';
|
||||
import type { BuiltinTokenizer } from 'reconcile-text';
|
||||
import './style.scss';
|
||||
|
||||
|
|
@ -10,41 +10,44 @@ const tokenizerRadios = document.querySelectorAll(
|
|||
'input[name="tokenizer"]'
|
||||
) as NodeListOf<HTMLInputElement>;
|
||||
|
||||
const sampleText = `The "reconcile-text" Rust library is embedded on this page as a WASM module and powers these text boxes. Experiment with changing the "Original", "First user's edit", and "Second user's edit" text boxes to see competing changes get merged in real-time within the "Merged result" box. Here, you will see color-coded tokens marking the origin of each token, including ones that got deleted. The result highly depends on the tokenisation strategy, for example, deciding how casing or whitespace is taken into account.`;
|
||||
const sampleText = `The reconcile-text library is embedded on this page as a WASM module and powers these text boxes. Experiment with changing the "Original", "First user's edit", and "Second user's edit" text boxes to see competing changes get merged in real-time within the "Merged result" box.
|
||||
|
||||
Here, you will see color-coded tokens marking the origin of each token, including ones that got deleted. The result highly depends on the tokenisation strategy which may be:
|
||||
- Character-based
|
||||
- Word-based`;
|
||||
|
||||
let pendingUpdate: number | null = null;
|
||||
function scheduleUpdate(): void {
|
||||
if (pendingUpdate === null) {
|
||||
pendingUpdate = requestAnimationFrame(() => {
|
||||
pendingUpdate = null;
|
||||
updateMergedText();
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
async function main(): Promise<void> {
|
||||
originalTextArea.addEventListener('input', updateMergedText);
|
||||
leftTextArea.addEventListener('input', updateMergedText);
|
||||
rightTextArea.addEventListener('input', updateMergedText);
|
||||
originalTextArea.addEventListener('input', scheduleUpdate);
|
||||
leftTextArea.addEventListener('input', scheduleUpdate);
|
||||
rightTextArea.addEventListener('input', scheduleUpdate);
|
||||
|
||||
leftTextArea.addEventListener('selectionchange', updateMergedText);
|
||||
rightTextArea.addEventListener('selectionchange', updateMergedText);
|
||||
leftTextArea.addEventListener('select', updateMergedText);
|
||||
rightTextArea.addEventListener('select', updateMergedText);
|
||||
|
||||
console.info(
|
||||
reconcile(
|
||||
'Hello world',
|
||||
{
|
||||
text: 'Hello beautiful world',
|
||||
cursors: [{ id: 1, position: 6 }], // After "Hello "
|
||||
},
|
||||
{
|
||||
text: 'Hi world',
|
||||
cursors: [{ id: 2, position: 0 }], // At the beginning
|
||||
}
|
||||
)
|
||||
);
|
||||
document.addEventListener('selectionchange', () => {
|
||||
if (
|
||||
document.activeElement === leftTextArea ||
|
||||
document.activeElement === rightTextArea
|
||||
) {
|
||||
scheduleUpdate();
|
||||
}
|
||||
});
|
||||
|
||||
window.addEventListener('resize', resizeTextAreas);
|
||||
|
||||
tokenizerRadios.forEach((radio) => {
|
||||
radio.addEventListener('change', updateMergedText);
|
||||
radio.addEventListener('change', scheduleUpdate);
|
||||
});
|
||||
|
||||
loadSample();
|
||||
updateMergedText();
|
||||
focusTextArea(leftTextArea);
|
||||
}
|
||||
|
||||
// Edit the instructions to generate example edits
|
||||
|
|
@ -52,10 +55,10 @@ function loadSample(): void {
|
|||
originalTextArea.value = sampleText;
|
||||
leftTextArea.value =
|
||||
sampleText.replace('color', 'colour') +
|
||||
" Check out what's the most complex conflict you can come up with!";
|
||||
rightTextArea.value = sampleText
|
||||
.replace(', for example,', ' such as')
|
||||
.replace('WASM', 'WebAssembly');
|
||||
"\n- Line-based\n\nCheck out what's the most complex conflict you can come up with!";
|
||||
rightTextArea.value =
|
||||
sampleText.replace(', for example,', ' such as').replace('WASM', 'WebAssembly') +
|
||||
'\n- Or your custom tokeniser';
|
||||
}
|
||||
|
||||
function updateMergedText(): void {
|
||||
|
|
@ -84,7 +87,7 @@ function updateMergedText(): void {
|
|||
|
||||
let selectionStart: number = Number.NEGATIVE_INFINITY;
|
||||
let selectionEnd: number = Number.NEGATIVE_INFINITY;
|
||||
if (results.cursors?.length ?? 0 > 0) {
|
||||
if ((results.cursors?.length ?? 0) > 0) {
|
||||
selectionStart = results.cursors![0].position;
|
||||
selectionEnd = results.cursors![1].position;
|
||||
}
|
||||
|
|
@ -99,28 +102,48 @@ function updateMergedText(): void {
|
|||
}
|
||||
|
||||
for (const { text, history } of results.history) {
|
||||
const isDelete = history === 'RemovedFromLeft' || history === 'RemovedFromRight';
|
||||
let spanChars: string[] = [];
|
||||
let currentClass = '';
|
||||
|
||||
const flushSpan = () => {
|
||||
if (spanChars.length > 0) {
|
||||
const span = document.createElement('span');
|
||||
span.className = currentClass;
|
||||
span.textContent = spanChars.join('');
|
||||
fragment.appendChild(span);
|
||||
spanChars = [];
|
||||
}
|
||||
};
|
||||
|
||||
for (const character of text) {
|
||||
const span = document.createElement('span');
|
||||
span.className = history;
|
||||
span.textContent = character;
|
||||
|
||||
if (selectionStart <= currentPosition && currentPosition < selectionEnd) {
|
||||
span.className += ` selection-${selectionSide}`;
|
||||
let className = history;
|
||||
if (
|
||||
!isDelete &&
|
||||
selectionStart <= currentPosition &&
|
||||
currentPosition < selectionEnd
|
||||
) {
|
||||
className += ` selection-${selectionSide}`;
|
||||
}
|
||||
|
||||
fragment.appendChild(span);
|
||||
|
||||
if (currentPosition === selectionEnd - 1) {
|
||||
fragment.appendChild(
|
||||
createSelectionOverlay(selectionSide === 'left', isSelection)
|
||||
);
|
||||
if (className !== currentClass) {
|
||||
flushSpan();
|
||||
currentClass = className;
|
||||
}
|
||||
spanChars.push(character);
|
||||
|
||||
if (history !== 'RemovedFromLeft' && history !== 'RemovedFromRight') {
|
||||
// Only increment currentPosition for non-removed characters
|
||||
if (!isDelete) {
|
||||
if (currentPosition === selectionEnd - 1) {
|
||||
flushSpan();
|
||||
fragment.appendChild(
|
||||
createSelectionOverlay(selectionSide === 'left', isSelection)
|
||||
);
|
||||
}
|
||||
currentPosition++;
|
||||
}
|
||||
}
|
||||
|
||||
flushSpan();
|
||||
}
|
||||
|
||||
mergedTextArea.innerHTML = '';
|
||||
|
|
@ -171,7 +194,7 @@ function createSelectionOverlay(isLeft: boolean, isSelection: boolean): HTMLSpan
|
|||
|
||||
function getSelectedTokenizer(): BuiltinTokenizer {
|
||||
const selectedRadio = Array.from(tokenizerRadios).find((radio) => radio.checked);
|
||||
return selectedRadio?.value as BuiltinTokenizer;
|
||||
return (selectedRadio?.value ?? 'Markdown') as BuiltinTokenizer;
|
||||
}
|
||||
|
||||
function resizeTextAreas(): void {
|
||||
|
|
@ -188,10 +211,8 @@ function autoResize(textarea: HTMLTextAreaElement): void {
|
|||
textarea.style.height = textarea.scrollHeight + 'px';
|
||||
}
|
||||
|
||||
function focusTextArea(textarea: HTMLTextAreaElement): void {
|
||||
textarea.focus();
|
||||
textarea.selectionStart = 0;
|
||||
textarea.selectionEnd = 0;
|
||||
}
|
||||
|
||||
main();
|
||||
main().catch((error) => {
|
||||
document.body.textContent =
|
||||
'Failed to load the application. Please ensure your browser supports WebAssembly.';
|
||||
console.error(error);
|
||||
});
|
||||
|
|
|
|||
|
|
@ -479,27 +479,29 @@ $DOT_RADIUS: 4;
|
|||
}
|
||||
|
||||
footer {
|
||||
padding: 16px;
|
||||
padding: 32px 16px;
|
||||
width: 100%;
|
||||
position: relative;
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
gap: 24px;
|
||||
color: $text-secondary;
|
||||
}
|
||||
|
||||
.github-link > svg {
|
||||
position: absolute;
|
||||
.footer-links {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 16px;
|
||||
}
|
||||
|
||||
.footer-links > a > svg {
|
||||
color: $text-secondary;
|
||||
top: 50%;
|
||||
right: 36px;
|
||||
transform: translateY(-50%);
|
||||
width: 32px;
|
||||
height: 32px;
|
||||
width: 28px;
|
||||
height: 28px;
|
||||
transition: transform 0.2s;
|
||||
}
|
||||
|
||||
.github-link > svg:hover {
|
||||
.footer-links > a > svg:hover {
|
||||
cursor: pointer;
|
||||
transform: translateY(-50%) scale(1.15);
|
||||
transform: scale(1.15);
|
||||
}
|
||||
|
|
|
|||
1858
reconcile-js/package-lock.json
generated
1858
reconcile-js/package-lock.json
generated
File diff suppressed because it is too large
Load diff
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "reconcile-text",
|
||||
"version": "0.5.0",
|
||||
"version": "0.11.0",
|
||||
"description": "Intelligent 3-way text merging with automated conflict resolution",
|
||||
"main": "dist/reconcile.node.js",
|
||||
"browser": "dist/reconcile.web.js",
|
||||
|
|
@ -18,7 +18,7 @@
|
|||
"homepage": "https://schmelczer.dev/reconcile/",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/schmelczer/reconcile.git"
|
||||
"url": "git+https://github.com/schmelczer/reconcile.git"
|
||||
},
|
||||
"bugs": {
|
||||
"url": "https://github.com/schmelczer/reconcile/issues",
|
||||
|
|
@ -37,14 +37,14 @@
|
|||
},
|
||||
"devDependencies": {
|
||||
"@types/jest": "^30.0.0",
|
||||
"jest": "^30.0.4",
|
||||
"prettier": "^3.6.2",
|
||||
"jest": "^30.3.0",
|
||||
"prettier": "^3.8.1",
|
||||
"reconcile-text": "file:../pkg",
|
||||
"ts-jest": "^29.4.0",
|
||||
"ts-loader": "^9.5.2",
|
||||
"ts-jest": "^29.4.6",
|
||||
"ts-loader": "^9.5.4",
|
||||
"tslib": "2.8.1",
|
||||
"typescript": "5.8.3",
|
||||
"webpack": "^5.99.9",
|
||||
"typescript": "5.9.3",
|
||||
"webpack": "^5.105.4",
|
||||
"webpack-cli": "^6.0.1",
|
||||
"webpack-merge": "^6.0.1"
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,4 +1,21 @@
|
|||
import { reconcile, reconcileWithHistory } from './index';
|
||||
import { reconcile, reconcileWithHistory, diff, undiff } from './index';
|
||||
import { installWasmLeakDetector, checkForWasmLeaks } from './wasm-leak-detector';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||
|
||||
installWasmLeakDetector();
|
||||
|
||||
afterEach(() => {
|
||||
const leaks = checkForWasmLeaks();
|
||||
if (leaks.length > 0) {
|
||||
throw new Error(
|
||||
`WASM memory leak: ${leaks.length} object(s) not freed:\n ${leaks.join('\n ')}`
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
describe('reconcile', () => {
|
||||
it('call reconcile without cursors', () => {
|
||||
|
|
@ -44,3 +61,35 @@ describe('reconcile', () => {
|
|||
expect(result.history.length).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('test_diff_and_undiff_are_inverse', () => {
|
||||
const resourcesPath = path.join(__dirname, '../../tests/resources');
|
||||
|
||||
const readFileSlice = (fileName: string, start: number, end: number): string => {
|
||||
const filePath = path.join(resourcesPath, fileName);
|
||||
const content = fs.readFileSync(filePath, 'utf-8');
|
||||
const chars = Array.from(content); // Handle unicode properly
|
||||
return chars.slice(start, Math.min(end, chars.length)).join('');
|
||||
};
|
||||
|
||||
const files = ['pride_and_prejudice.txt', 'room_with_a_view.txt', 'blns.txt'];
|
||||
|
||||
const ranges = [{ start: 0, end: 50000 }];
|
||||
|
||||
files.forEach((file1) => {
|
||||
files.forEach((file2) => {
|
||||
ranges.forEach((range1) => {
|
||||
ranges.forEach((range2) => {
|
||||
it(`should diff & undiff ${file1}[${range1.start}..${range1.end}], ${file2}[${range2.start}..${range2.end}] without panic`, () => {
|
||||
const content1 = readFileSlice(file1, range1.start, range1.end);
|
||||
const content2 = readFileSlice(file2, range2.start, range2.end);
|
||||
|
||||
const changes = diff(content1, content2);
|
||||
const actual = undiff(content1, changes);
|
||||
expect(actual).toEqual(content2);
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -4,14 +4,15 @@ import {
|
|||
TextWithCursors as wasmTextWithCursors,
|
||||
SpanWithHistory as wasmSpanWithHistory,
|
||||
reconcileWithHistory as wasmReconcileWithHistory,
|
||||
isBinary as wasmIsBinary,
|
||||
diff as wasmDiff,
|
||||
undiff as wasmUndiff,
|
||||
initSync,
|
||||
} from 'reconcile-text';
|
||||
|
||||
import wasmBytes from 'reconcile-text/reconcile_text_bg.wasm';
|
||||
|
||||
// Define the enum values as const arrays to avoid duplication
|
||||
const BUILTIN_TOKENIZERS = ['Character', 'Line', 'Word'] as const;
|
||||
const BUILTIN_TOKENIZERS = ['Character', 'Line', 'Markdown', 'Word'] as const;
|
||||
const HISTORY_VALUES = [
|
||||
'Unchanged',
|
||||
'AddedFromLeft',
|
||||
|
|
@ -53,11 +54,8 @@ export interface TextWithCursors {
|
|||
}
|
||||
|
||||
/**
|
||||
* Represents a text document with associated cursor positions.
|
||||
*
|
||||
* This interface is used both as input to reconcile functions (to specify where
|
||||
* cursors are positioned in the original documents) and as output (with cursors
|
||||
* automatically repositioned after merging).
|
||||
* Like `TextWithCursors`, but cursors may be null or undefined (treated as empty).
|
||||
* Used as input where cursor tracking is optional.
|
||||
*/
|
||||
export interface TextWithOptionalCursors {
|
||||
/** The document's entire content as a string */
|
||||
|
|
@ -96,7 +94,7 @@ export interface TextWithCursorsAndHistory {
|
|||
text: string;
|
||||
|
||||
/**
|
||||
* Array of cursor positions within the merged text. Can empty if there are no cursors to track.
|
||||
* Array of cursor positions within the merged text. Can be empty if there are no cursors to track.
|
||||
* All cursors are automatically repositioned from the left and right documents.
|
||||
*/
|
||||
cursors: CursorPosition[];
|
||||
|
|
@ -123,9 +121,9 @@ export interface SpanWithHistory {
|
|||
history: History;
|
||||
}
|
||||
|
||||
const UNSUPPORTED_TOKENIZER_ERROR = `Unsupported tokenizer. Only ${BUILTIN_TOKENIZERS.join(
|
||||
const UNSUPPORTED_TOKENIZER_ERROR = `Unsupported tokenizer, only ${BUILTIN_TOKENIZERS.join(
|
||||
', '
|
||||
)} are supported.`;
|
||||
)} are supported`;
|
||||
|
||||
let isInitialised = false;
|
||||
|
||||
|
|
@ -179,10 +177,69 @@ export function reconcile(
|
|||
return jsResult;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a compact diff representation between an original and changed text.
|
||||
*
|
||||
* These can be parsed and unpacked using the `undiff` function or the Rust crate's EditedText::from_diff.
|
||||
* Cursor positions are omitted from the diff result.
|
||||
*
|
||||
* This function computes the differences between two versions of text and returns
|
||||
* a compact representation of those changes.
|
||||
*
|
||||
* @param original - The original/base version of the text
|
||||
* @param changed - The modified version of the text (either string or TextWithCursors with cursor positions)
|
||||
* @param tokenizer - The tokenisation strategy, which is the same as used in `reconcile`.
|
||||
* @returns An array of inserts (strings), deletes (negative integers), and retained spans (positive integers).
|
||||
*/
|
||||
export function diff(
|
||||
original: string,
|
||||
changed: string | TextWithOptionalCursors,
|
||||
tokenizer: BuiltinTokenizer = 'Word'
|
||||
): Array<number | string> {
|
||||
init();
|
||||
|
||||
if (!BUILTIN_TOKENIZERS.includes(tokenizer)) {
|
||||
throw new Error(UNSUPPORTED_TOKENIZER_ERROR);
|
||||
}
|
||||
|
||||
const changedWasm = toWasmTextWithCursors(changed);
|
||||
|
||||
const result = wasmDiff(original, changedWasm, tokenizer);
|
||||
|
||||
changedWasm.free();
|
||||
|
||||
return result.map((item) => (typeof item === 'bigint' ? Number(item) : item));
|
||||
}
|
||||
|
||||
/**
|
||||
* Applies a compact diff to an original text to reconstruct the changed version.
|
||||
*
|
||||
* This function takes an original text and a compact diff representation (as produced
|
||||
* by the `diff` function) and reconstructs the modified text.
|
||||
*
|
||||
* @param original - The original/base version of the text
|
||||
* @param diff - The compact diff array (inserts as strings, deletes as negative integers, retained spans as positive integers)
|
||||
* @param tokenizer - The tokenisation strategy, which is the same as used in `reconcile`.
|
||||
* @returns The reconstructed changed text as a string.
|
||||
*/
|
||||
export function undiff(
|
||||
original: string,
|
||||
diff: Array<number | bigint | string>,
|
||||
tokenizer: BuiltinTokenizer = 'Word'
|
||||
): string {
|
||||
init();
|
||||
|
||||
if (!BUILTIN_TOKENIZERS.includes(tokenizer)) {
|
||||
throw new Error(UNSUPPORTED_TOKENIZER_ERROR);
|
||||
}
|
||||
|
||||
return wasmUndiff(original, diff, tokenizer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Merges three versions of text and returns detailed provenance information.
|
||||
*
|
||||
* This function behaves identically to `reconcile()` but additionally provides
|
||||
* This function behaves like `reconcile()` but also provides
|
||||
* detailed historical information about the origin of each text span in the result.
|
||||
* This is valuable for understanding how the merge was performed and which changes
|
||||
* came from which source.
|
||||
|
|
@ -237,19 +294,6 @@ export function reconcileWithHistory(
|
|||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Check (using heuristics) if the given data is binary or text content.
|
||||
*
|
||||
* Only text inputs can be reconciled using the library's functions.
|
||||
*
|
||||
* @param data - The data to check for binary content. This should be a Uint8Array.
|
||||
* @returns True if the data is likely binary, false if it is likely text.
|
||||
*/
|
||||
export function isBinary(data: Uint8Array): boolean {
|
||||
init();
|
||||
return wasmIsBinary(data);
|
||||
}
|
||||
|
||||
function init() {
|
||||
if (isInitialised) {
|
||||
return;
|
||||
|
|
@ -278,9 +322,15 @@ function toWasmCursorPosition({ id, position }: CursorPosition): wasmCursorPosit
|
|||
}
|
||||
|
||||
function toTextWithCursors(textWithCursor: wasmTextWithCursors): TextWithCursors {
|
||||
const wasmCursors = textWithCursor.cursors();
|
||||
const cursors = wasmCursors.map(toCursorPosition);
|
||||
for (const cursor of wasmCursors) {
|
||||
cursor.free();
|
||||
}
|
||||
|
||||
return {
|
||||
text: textWithCursor.text(),
|
||||
cursors: textWithCursor.cursors().map(toCursorPosition),
|
||||
cursors,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -291,9 +341,11 @@ function toCursorPosition(cursor: wasmCursorPosition): CursorPosition {
|
|||
};
|
||||
}
|
||||
|
||||
function toSpanWithHistory(textWithHistory: wasmSpanWithHistory): SpanWithHistory {
|
||||
return {
|
||||
text: textWithHistory.text(),
|
||||
history: textWithHistory.history(),
|
||||
function toSpanWithHistory(span: wasmSpanWithHistory): SpanWithHistory {
|
||||
const result = {
|
||||
text: span.text(),
|
||||
history: span.history(),
|
||||
};
|
||||
span.free();
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
63
reconcile-js/src/wasm-leak-detector.ts
Normal file
63
reconcile-js/src/wasm-leak-detector.ts
Normal file
|
|
@ -0,0 +1,63 @@
|
|||
/**
|
||||
* Test utility for detecting WASM memory leaks.
|
||||
*
|
||||
* wasm-bindgen registers every JS-side object with a `FinalizationRegistry`.
|
||||
* This detector patches `FinalizationRegistry.prototype.register` to collect
|
||||
* references to all WASM objects. After each test, {@link checkForWasmLeaks}
|
||||
* inspects `__wbg_ptr` on every tracked object - a non-zero pointer means
|
||||
* `.free()` was never called, i.e. a leak.
|
||||
*
|
||||
* Install once (before any WASM calls) and call {@link checkForWasmLeaks}
|
||||
* in an `afterEach` hook.
|
||||
*/
|
||||
|
||||
let trackedObjects: object[] = [];
|
||||
let originalRegister: Function | null = null;
|
||||
|
||||
interface WasmBindgenObject {
|
||||
__wbg_ptr: number;
|
||||
constructor: { name?: string };
|
||||
}
|
||||
|
||||
function isWasmBindgenObject(target: unknown): target is WasmBindgenObject {
|
||||
return (
|
||||
target !== null &&
|
||||
typeof target === 'object' &&
|
||||
'__wbg_ptr' in (target as Record<string, unknown>)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Patches `FinalizationRegistry.prototype.register` to track all wasm-bindgen
|
||||
* objects. Safe to call multiple times (idempotent).
|
||||
*/
|
||||
export function installWasmLeakDetector(): void {
|
||||
if (originalRegister) return;
|
||||
|
||||
originalRegister = FinalizationRegistry.prototype.register;
|
||||
|
||||
FinalizationRegistry.prototype.register = function (
|
||||
target: object,
|
||||
heldValue: unknown,
|
||||
unregisterToken?: object
|
||||
) {
|
||||
if (isWasmBindgenObject(target)) {
|
||||
trackedObjects.push(target);
|
||||
}
|
||||
return originalRegister!.call(this, target, heldValue, unregisterToken);
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns any tracked WASM objects whose `__wbg_ptr` is still non-zero
|
||||
* (i.e. `.free()` was never called). Clears the tracked set afterwards.
|
||||
*/
|
||||
export function checkForWasmLeaks(): string[] {
|
||||
const leaks = trackedObjects
|
||||
.filter(isWasmBindgenObject)
|
||||
.filter((obj) => obj.__wbg_ptr !== 0)
|
||||
.map((obj) => `${obj.constructor?.name ?? 'Unknown'} (ptr=${obj.__wbg_ptr})`);
|
||||
|
||||
trackedObjects = [];
|
||||
return leaks;
|
||||
}
|
||||
|
|
@ -9,6 +9,5 @@
|
|||
"declarationDir": "./dist/types",
|
||||
"skipLibCheck": true,
|
||||
"inlineSourceMap": true
|
||||
},
|
||||
"exclude": ["./dist", "**/*.test.ts"]
|
||||
}
|
||||
}
|
||||
|
|
|
|||
10
reconcile-python/.gitignore
vendored
Normal file
10
reconcile-python/.gitignore
vendored
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
.venv/
|
||||
.pytest_cache/
|
||||
.ruff_cache/
|
||||
__pycache__/
|
||||
*.egg-info/
|
||||
*.so
|
||||
*.dylib
|
||||
*.dSYM/
|
||||
dist/
|
||||
README.md
|
||||
161
reconcile-python/Cargo.lock
generated
Normal file
161
reconcile-python/Cargo.lock
generated
Normal file
|
|
@ -0,0 +1,161 @@
|
|||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 4
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.183"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d"
|
||||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
version = "1.21.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
|
||||
|
||||
[[package]]
|
||||
name = "portable-atomic"
|
||||
version = "1.13.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.106"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyo3"
|
||||
version = "0.28.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cf85e27e86080aafd5a22eae58a162e133a589551542b3e5cee4beb27e54f8e1"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"once_cell",
|
||||
"portable-atomic",
|
||||
"pyo3-build-config",
|
||||
"pyo3-ffi",
|
||||
"pyo3-macros",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyo3-build-config"
|
||||
version = "0.28.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8bf94ee265674bf76c09fa430b0e99c26e319c945d96ca0d5a8215f31bf81cf7"
|
||||
dependencies = [
|
||||
"target-lexicon",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyo3-ffi"
|
||||
version = "0.28.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "491aa5fc66d8059dd44a75f4580a2962c1862a1c2945359db36f6c2818b748dc"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"pyo3-build-config",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyo3-macros"
|
||||
version = "0.28.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f5d671734e9d7a43449f8480f8b38115df67bef8d21f76837fa75ee7aaa5e52e"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"pyo3-macros-backend",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyo3-macros-backend"
|
||||
version = "0.28.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "22faaa1ce6c430a1f71658760497291065e6450d7b5dc2bcf254d49f66ee700a"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
"pyo3-build-config",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.45"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "reconcile-text"
|
||||
version = "0.11.0"
|
||||
dependencies = [
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "reconcile-text-python"
|
||||
version = "0.11.0"
|
||||
dependencies = [
|
||||
"pyo3",
|
||||
"reconcile-text",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.117"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "target-lexicon"
|
||||
version = "0.13.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca"
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "2.0.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4"
|
||||
dependencies = [
|
||||
"thiserror-impl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror-impl"
|
||||
version = "2.0.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
|
||||
16
reconcile-python/Cargo.toml
Normal file
16
reconcile-python/Cargo.toml
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
[package]
|
||||
name = "reconcile-text-python"
|
||||
version = "0.11.0"
|
||||
edition = "2024"
|
||||
rust-version = "1.94"
|
||||
authors = ["Andras Schmelczer <andras@schmelczer.dev>"]
|
||||
license = "MIT"
|
||||
publish = false
|
||||
|
||||
[lib]
|
||||
name = "_native"
|
||||
crate-type = ["cdylib"]
|
||||
|
||||
[dependencies]
|
||||
reconcile-text = { path = ".." }
|
||||
pyo3 = { version = "0.28.2", features = ["extension-module"] }
|
||||
52
reconcile-python/pyproject.toml
Normal file
52
reconcile-python/pyproject.toml
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
[build-system]
|
||||
requires = ["maturin>=1.0,<2.0"]
|
||||
build-backend = "maturin"
|
||||
|
||||
[project]
|
||||
name = "reconcile-text"
|
||||
version = "0.11.0"
|
||||
description = "Intelligent 3-way text merging with automated conflict resolution"
|
||||
readme = "README.md"
|
||||
license = { text = "MIT" }
|
||||
authors = [{ name = "Andras Schmelczer", email = "andras@schmelczer.dev" }]
|
||||
requires-python = ">=3.9"
|
||||
classifiers = [
|
||||
"Programming Language :: Rust",
|
||||
"Programming Language :: Python :: Implementation :: CPython",
|
||||
"Programming Language :: Python :: Implementation :: PyPy",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Operating System :: OS Independent",
|
||||
"Typing :: Typed",
|
||||
]
|
||||
keywords = ["merge", "OT", "CRDT", "3-way", "diff", "text"]
|
||||
|
||||
[dependency-groups]
|
||||
dev = ["maturin>=1.0,<2.0", "pytest>=8", "ruff>=0.15", "pyright>=1"]
|
||||
|
||||
[project.urls]
|
||||
Homepage = "https://schmelczer.dev/reconcile"
|
||||
Repository = "https://github.com/schmelczer/reconcile"
|
||||
Issues = "https://github.com/schmelczer/reconcile/issues"
|
||||
|
||||
[tool.maturin]
|
||||
manifest-path = "Cargo.toml"
|
||||
module-name = "reconcile_text._native"
|
||||
python-source = "python"
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
testpaths = ["tests"]
|
||||
|
||||
[tool.ruff]
|
||||
target-version = "py39"
|
||||
line-length = 100
|
||||
|
||||
[tool.ruff.lint]
|
||||
select = ["E", "F", "W", "I", "UP", "B", "SIM", "RUF"]
|
||||
|
||||
[tool.ruff.lint.isort]
|
||||
known-first-party = ["reconcile_text"]
|
||||
|
||||
[tool.pyright]
|
||||
pythonVersion = "3.9"
|
||||
typeCheckingMode = "strict"
|
||||
include = ["python", "tests"]
|
||||
165
reconcile-python/python/reconcile_text/__init__.py
Normal file
165
reconcile-python/python/reconcile_text/__init__.py
Normal file
|
|
@ -0,0 +1,165 @@
|
|||
"""Intelligent 3-way text merging with automated conflict resolution."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Literal, TypedDict, Union
|
||||
|
||||
from reconcile_text._native import diff as _diff
|
||||
from reconcile_text._native import reconcile as _reconcile
|
||||
from reconcile_text._native import reconcile_with_history as _reconcile_with_history
|
||||
from reconcile_text._native import undiff as _undiff
|
||||
|
||||
BuiltinTokenizer = Literal["Character", "Line", "Markdown", "Word"]
|
||||
"""Tokenization strategy for text merging."""
|
||||
|
||||
History = Literal[
|
||||
"Unchanged", "AddedFromLeft", "AddedFromRight", "RemovedFromLeft", "RemovedFromRight"
|
||||
]
|
||||
"""Provenance label for each span in a merge result."""
|
||||
|
||||
|
||||
class CursorPosition(TypedDict):
|
||||
"""A cursor position within a text document."""
|
||||
|
||||
id: int
|
||||
"""Unique identifier for the cursor."""
|
||||
position: int
|
||||
"""Character position in the text (0-based)."""
|
||||
|
||||
|
||||
class TextWithCursors(TypedDict):
|
||||
"""A text document with associated cursor positions."""
|
||||
|
||||
text: str
|
||||
"""The document content."""
|
||||
cursors: list[CursorPosition]
|
||||
"""Cursor positions within the text."""
|
||||
|
||||
|
||||
class SpanWithHistory(TypedDict):
|
||||
"""A text span annotated with its origin in a merge result."""
|
||||
|
||||
text: str
|
||||
"""The text content of this span."""
|
||||
history: History
|
||||
"""Which source this span came from."""
|
||||
|
||||
|
||||
class TextWithCursorsAndHistory(TypedDict):
|
||||
"""A merged text document with cursor positions and change provenance."""
|
||||
|
||||
text: str
|
||||
"""The merged document content."""
|
||||
cursors: list[CursorPosition]
|
||||
"""Repositioned cursor positions."""
|
||||
history: list[SpanWithHistory]
|
||||
"""Provenance information for each text span."""
|
||||
|
||||
|
||||
TextInput = Union[str, TextWithCursors]
|
||||
"""Input type for text arguments: either a plain string or a dict with text and cursors."""
|
||||
|
||||
|
||||
def reconcile(
|
||||
parent: str,
|
||||
left: TextInput,
|
||||
right: TextInput,
|
||||
tokenizer: BuiltinTokenizer = "Word",
|
||||
) -> TextWithCursors:
|
||||
"""Merge three versions of text using conflict-free resolution.
|
||||
|
||||
Takes a parent text and two concurrent edits (left and right), returning
|
||||
the merged result with automatically repositioned cursors.
|
||||
|
||||
Args:
|
||||
parent: The original text that both sides diverged from.
|
||||
left: The left edit (string or dict with "text" and "cursors").
|
||||
right: The right edit (string or dict with "text" and "cursors").
|
||||
tokenizer: Tokenization strategy. Defaults to "Word".
|
||||
|
||||
Returns:
|
||||
A dict with "text" (merged string) and "cursors" (repositioned cursor list).
|
||||
"""
|
||||
return _reconcile(parent, left, right, tokenizer) # type: ignore[return-value]
|
||||
|
||||
|
||||
def reconcile_with_history(
|
||||
parent: str,
|
||||
left: TextInput,
|
||||
right: TextInput,
|
||||
tokenizer: BuiltinTokenizer = "Word",
|
||||
) -> TextWithCursorsAndHistory:
|
||||
"""Merge three versions of text and return provenance history.
|
||||
|
||||
Like `reconcile`, but also returns which source each text span came from.
|
||||
|
||||
Args:
|
||||
parent: The original text that both sides diverged from.
|
||||
left: The left edit (string or dict with "text" and "cursors").
|
||||
right: The right edit (string or dict with "text" and "cursors").
|
||||
tokenizer: Tokenization strategy. Defaults to "Word".
|
||||
|
||||
Returns:
|
||||
A dict with "text", "cursors", and "history".
|
||||
"""
|
||||
return _reconcile_with_history(parent, left, right, tokenizer) # type: ignore[return-value]
|
||||
|
||||
|
||||
def diff(
|
||||
parent: str,
|
||||
changed: TextInput,
|
||||
tokenizer: BuiltinTokenizer = "Word",
|
||||
) -> list[int | str]:
|
||||
"""Generate a compact diff between two texts.
|
||||
|
||||
Returns retain counts (positive ints), delete counts (negative ints),
|
||||
and inserted strings.
|
||||
|
||||
Args:
|
||||
parent: The original text.
|
||||
changed: The modified text (string or dict with "text" and "cursors").
|
||||
tokenizer: Tokenization strategy. Defaults to "Word".
|
||||
|
||||
Returns:
|
||||
A list of ints and strings representing the diff.
|
||||
|
||||
Raises:
|
||||
ValueError: If the diff computation overflows.
|
||||
"""
|
||||
return _diff(parent, changed, tokenizer) # type: ignore[return-value]
|
||||
|
||||
|
||||
def undiff(
|
||||
parent: str,
|
||||
diff: list[int | str],
|
||||
tokenizer: BuiltinTokenizer = "Word",
|
||||
) -> str:
|
||||
"""Apply a compact diff to reconstruct the changed text.
|
||||
|
||||
Args:
|
||||
parent: The original text.
|
||||
diff: A list of ints and strings (as produced by `diff`).
|
||||
tokenizer: Tokenization strategy. Defaults to "Word".
|
||||
|
||||
Returns:
|
||||
The reconstructed text.
|
||||
|
||||
Raises:
|
||||
ValueError: If the diff format is invalid.
|
||||
"""
|
||||
return _undiff(parent, diff, tokenizer)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"BuiltinTokenizer",
|
||||
"CursorPosition",
|
||||
"History",
|
||||
"SpanWithHistory",
|
||||
"TextInput",
|
||||
"TextWithCursors",
|
||||
"TextWithCursorsAndHistory",
|
||||
"diff",
|
||||
"reconcile",
|
||||
"reconcile_with_history",
|
||||
"undiff",
|
||||
]
|
||||
24
reconcile-python/python/reconcile_text/_native.pyi
Normal file
24
reconcile-python/python/reconcile_text/_native.pyi
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
from typing import Any
|
||||
|
||||
def reconcile(
|
||||
parent: str,
|
||||
left: Any,
|
||||
right: Any,
|
||||
tokenizer: str = "Word",
|
||||
) -> dict[str, Any]: ...
|
||||
def reconcile_with_history(
|
||||
parent: str,
|
||||
left: Any,
|
||||
right: Any,
|
||||
tokenizer: str = "Word",
|
||||
) -> dict[str, Any]: ...
|
||||
def diff(
|
||||
parent: str,
|
||||
changed: Any,
|
||||
tokenizer: str = "Word",
|
||||
) -> list[int | str]: ...
|
||||
def undiff(
|
||||
parent: str,
|
||||
diff: list[int | str],
|
||||
tokenizer: str = "Word",
|
||||
) -> str: ...
|
||||
0
reconcile-python/python/reconcile_text/py.typed
Normal file
0
reconcile-python/python/reconcile_text/py.typed
Normal file
235
reconcile-python/src/lib.rs
Normal file
235
reconcile-python/src/lib.rs
Normal file
|
|
@ -0,0 +1,235 @@
|
|||
use pyo3::prelude::*;
|
||||
use pyo3::types::{PyDict, PyList};
|
||||
use reconcile_text::{
|
||||
BuiltinTokenizer, CursorPosition, EditedText, NumberOrText, TextWithCursors,
|
||||
};
|
||||
|
||||
fn parse_tokenizer(tokenizer: &str) -> PyResult<BuiltinTokenizer> {
|
||||
match tokenizer {
|
||||
"Character" => Ok(BuiltinTokenizer::Character),
|
||||
"Line" => Ok(BuiltinTokenizer::Line),
|
||||
"Markdown" => Ok(BuiltinTokenizer::Markdown),
|
||||
"Word" => Ok(BuiltinTokenizer::Word),
|
||||
_ => Err(pyo3::exceptions::PyValueError::new_err(format!(
|
||||
"Unknown tokenizer '{tokenizer}', expected Character, Line, Markdown, or Word"
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_text_with_cursors(input: &Bound<'_, PyAny>) -> PyResult<TextWithCursors> {
|
||||
if let Ok(text) = input.extract::<String>() {
|
||||
return Ok(TextWithCursors::from(text));
|
||||
}
|
||||
|
||||
let dict = input.cast::<PyDict>()?;
|
||||
|
||||
let text: String = dict
|
||||
.get_item("text")?
|
||||
.ok_or_else(|| pyo3::exceptions::PyKeyError::new_err("text"))?
|
||||
.extract()?;
|
||||
|
||||
let cursors = match dict.get_item("cursors")? {
|
||||
Some(obj) if !obj.is_none() => {
|
||||
let list = obj.cast::<PyList>()?;
|
||||
let mut cursors = Vec::with_capacity(list.len());
|
||||
for item in list {
|
||||
let cursor_dict = item.cast::<PyDict>()?;
|
||||
let id: usize = cursor_dict
|
||||
.get_item("id")?
|
||||
.ok_or_else(|| pyo3::exceptions::PyKeyError::new_err("id"))?
|
||||
.extract()?;
|
||||
let position: usize = cursor_dict
|
||||
.get_item("position")?
|
||||
.ok_or_else(|| pyo3::exceptions::PyKeyError::new_err("position"))?
|
||||
.extract()?;
|
||||
cursors.push(CursorPosition::new(id, position));
|
||||
}
|
||||
cursors
|
||||
}
|
||||
_ => Vec::new(),
|
||||
};
|
||||
|
||||
Ok(TextWithCursors::new(text, cursors))
|
||||
}
|
||||
|
||||
fn text_with_cursors_to_dict<'py>(
|
||||
py: Python<'py>,
|
||||
twc: &TextWithCursors,
|
||||
) -> PyResult<Bound<'py, PyDict>> {
|
||||
let dict = PyDict::new(py);
|
||||
dict.set_item("text", twc.text())?;
|
||||
|
||||
let cursors = PyList::new(
|
||||
py,
|
||||
twc.cursors().iter().map(|c| {
|
||||
let d = PyDict::new(py);
|
||||
d.set_item("id", c.id()).unwrap();
|
||||
d.set_item("position", c.char_index()).unwrap();
|
||||
d
|
||||
}),
|
||||
)?;
|
||||
dict.set_item("cursors", cursors)?;
|
||||
|
||||
Ok(dict)
|
||||
}
|
||||
|
||||
/// Merge three versions of text using conflict-free resolution.
|
||||
///
|
||||
/// Takes a parent text and two concurrent edits (left and right), returning
|
||||
/// the merged result with automatically repositioned cursors.
|
||||
///
|
||||
/// Args:
|
||||
/// parent: The original text that both sides diverged from.
|
||||
/// left: The left edit, either a string or a dict with "text" and "cursors" keys.
|
||||
/// right: The right edit, either a string or a dict with "text" and "cursors" keys.
|
||||
/// tokenizer: Tokenization strategy - "Word" (default), "Character", "Line", or "Markdown".
|
||||
///
|
||||
/// Returns:
|
||||
/// A dict with "text" (merged string) and "cursors" (list of repositioned cursors).
|
||||
#[pyfunction]
|
||||
#[pyo3(signature = (parent, left, right, tokenizer = "Word"))]
|
||||
fn reconcile<'py>(
|
||||
py: Python<'py>,
|
||||
parent: &str,
|
||||
left: &Bound<'py, PyAny>,
|
||||
right: &Bound<'py, PyAny>,
|
||||
tokenizer: &str,
|
||||
) -> PyResult<Bound<'py, PyDict>> {
|
||||
let tokenizer = parse_tokenizer(tokenizer)?;
|
||||
let left = extract_text_with_cursors(left)?;
|
||||
let right = extract_text_with_cursors(right)?;
|
||||
|
||||
let result = reconcile_text::reconcile(parent, &left, &right, &*tokenizer).apply();
|
||||
text_with_cursors_to_dict(py, &result)
|
||||
}
|
||||
|
||||
/// Merge three versions of text and return provenance history.
|
||||
///
|
||||
/// Like `reconcile`, but also returns which source each text span came from.
|
||||
///
|
||||
/// Args:
|
||||
/// parent: The original text that both sides diverged from.
|
||||
/// left: The left edit, either a string or a dict with "text" and "cursors" keys.
|
||||
/// right: The right edit, either a string or a dict with "text" and "cursors" keys.
|
||||
/// tokenizer: Tokenization strategy - "Word" (default), "Character", "Line", or "Markdown".
|
||||
///
|
||||
/// Returns:
|
||||
/// A dict with "text", "cursors", and "history" (list of dicts with "text" and "history" keys).
|
||||
#[pyfunction]
|
||||
#[pyo3(signature = (parent, left, right, tokenizer = "Word"))]
|
||||
fn reconcile_with_history<'py>(
|
||||
py: Python<'py>,
|
||||
parent: &str,
|
||||
left: &Bound<'py, PyAny>,
|
||||
right: &Bound<'py, PyAny>,
|
||||
tokenizer: &str,
|
||||
) -> PyResult<Bound<'py, PyDict>> {
|
||||
let tokenizer = parse_tokenizer(tokenizer)?;
|
||||
let left = extract_text_with_cursors(left)?;
|
||||
let right = extract_text_with_cursors(right)?;
|
||||
|
||||
let reconciled = reconcile_text::reconcile(parent, &left, &right, &*tokenizer);
|
||||
let (text_with_cursors, history_spans) = reconciled.apply_with_all();
|
||||
|
||||
let dict = text_with_cursors_to_dict(py, &text_with_cursors)?;
|
||||
|
||||
let history = PyList::new(
|
||||
py,
|
||||
history_spans.iter().map(|span| {
|
||||
let d = PyDict::new(py);
|
||||
d.set_item("text", span.text()).unwrap();
|
||||
d.set_item("history", format!("{:?}", span.history()))
|
||||
.unwrap();
|
||||
d
|
||||
}),
|
||||
)?;
|
||||
dict.set_item("history", history)?;
|
||||
|
||||
Ok(dict)
|
||||
}
|
||||
|
||||
/// Generate a compact diff between two texts.
|
||||
///
|
||||
/// Returns a list of retain counts (positive ints), delete counts (negative ints),
|
||||
/// and inserted strings.
|
||||
///
|
||||
/// Args:
|
||||
/// parent: The original text.
|
||||
/// changed: The modified text, either a string or a dict with "text" and "cursors" keys.
|
||||
/// tokenizer: Tokenization strategy - "Word" (default), "Character", "Line", or "Markdown".
|
||||
///
|
||||
/// Returns:
|
||||
/// A list of ints and strings representing the diff.
|
||||
///
|
||||
/// Raises:
|
||||
/// ValueError: If the diff computation overflows.
|
||||
#[pyfunction]
|
||||
#[pyo3(signature = (parent, changed, tokenizer = "Word"))]
|
||||
fn diff<'py>(
|
||||
py: Python<'py>,
|
||||
parent: &str,
|
||||
changed: &Bound<'py, PyAny>,
|
||||
tokenizer: &str,
|
||||
) -> PyResult<Bound<'py, PyList>> {
|
||||
let tokenizer = parse_tokenizer(tokenizer)?;
|
||||
let changed = extract_text_with_cursors(changed)?;
|
||||
|
||||
let edited = EditedText::from_strings_with_tokenizer(parent, &changed, &*tokenizer);
|
||||
let diff_result = edited
|
||||
.to_diff()
|
||||
.map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?;
|
||||
|
||||
let list = PyList::empty(py);
|
||||
for item in diff_result {
|
||||
match item {
|
||||
NumberOrText::Number(n) => list.append(n)?,
|
||||
NumberOrText::Text(s) => list.append(s)?,
|
||||
}
|
||||
}
|
||||
|
||||
Ok(list)
|
||||
}
|
||||
|
||||
/// Apply a compact diff to a parent text to reconstruct the changed version.
|
||||
///
|
||||
/// Args:
|
||||
/// parent: The original text.
|
||||
/// diff: A list of ints and strings (as produced by `diff`).
|
||||
/// tokenizer: Tokenization strategy - "Word" (default), "Character", "Line", or "Markdown".
|
||||
///
|
||||
/// Returns:
|
||||
/// The reconstructed text.
|
||||
///
|
||||
/// Raises:
|
||||
/// ValueError: If the diff format is invalid.
|
||||
#[pyfunction]
|
||||
#[pyo3(signature = (parent, diff, tokenizer = "Word"))]
|
||||
fn undiff(parent: &str, diff: &Bound<'_, PyList>, tokenizer: &str) -> PyResult<String> {
|
||||
let tokenizer = parse_tokenizer(tokenizer)?;
|
||||
|
||||
let mut parsed: Vec<NumberOrText> = Vec::with_capacity(diff.len());
|
||||
for item in diff {
|
||||
if let Ok(n) = item.extract::<i64>() {
|
||||
parsed.push(NumberOrText::Number(n));
|
||||
} else if let Ok(s) = item.extract::<String>() {
|
||||
parsed.push(NumberOrText::Text(s));
|
||||
} else {
|
||||
return Err(pyo3::exceptions::PyTypeError::new_err(
|
||||
"Diff items must be int or str",
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
EditedText::from_diff(parent, parsed, &*tokenizer)
|
||||
.map(|edited| edited.apply().text())
|
||||
.map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))
|
||||
}
|
||||
|
||||
#[pymodule]
|
||||
fn _native(m: &Bound<'_, PyModule>) -> PyResult<()> {
|
||||
m.add_function(wrap_pyfunction!(reconcile, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(reconcile_with_history, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(diff, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(undiff, m)?)?;
|
||||
Ok(())
|
||||
}
|
||||
179
reconcile-python/tests/test_reconcile.py
Normal file
179
reconcile-python/tests/test_reconcile.py
Normal file
|
|
@ -0,0 +1,179 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from reconcile_text import diff, reconcile, reconcile_with_history, undiff
|
||||
|
||||
EXAMPLES_DIR = Path(__file__).resolve().parent.parent.parent / "examples"
|
||||
RESOURCES_DIR = Path(__file__).resolve().parent.parent.parent / "tests" / "resources"
|
||||
|
||||
FILES = ["pride_and_prejudice.txt", "room_with_a_view.txt", "blns.txt"]
|
||||
|
||||
|
||||
class TestReconcile:
|
||||
def test_basic_merge(self) -> None:
|
||||
result = reconcile("Hello", "Hello world", "Hi world")
|
||||
assert result["text"] == "Hi world"
|
||||
|
||||
def test_three_way_merge(self) -> None:
|
||||
parent = "Merging text is hard!"
|
||||
left = "Merging text is easy!"
|
||||
right = "With reconcile, merging documents is hard!"
|
||||
|
||||
result = reconcile(parent, left, right)
|
||||
assert result["text"] == "With reconcile, merging documents is easy!"
|
||||
|
||||
def test_with_cursors(self) -> None:
|
||||
result = reconcile(
|
||||
"Hello",
|
||||
{"text": "Hello world", "cursors": [{"id": 3, "position": 2}]},
|
||||
{
|
||||
"text": "Hi world",
|
||||
"cursors": [{"id": 4, "position": 0}, {"id": 5, "position": 3}],
|
||||
},
|
||||
)
|
||||
|
||||
assert result["text"] == "Hi world"
|
||||
assert result["cursors"] == [
|
||||
{"id": 3, "position": 0},
|
||||
{"id": 4, "position": 0},
|
||||
{"id": 5, "position": 3},
|
||||
]
|
||||
|
||||
def test_character_tokenizer(self) -> None:
|
||||
result = reconcile("abc", "axc", "abyc", "Character")
|
||||
assert result["text"] == "axyc"
|
||||
|
||||
def test_line_tokenizer(self) -> None:
|
||||
parent = "line1\nline2\nline3\n"
|
||||
left = "line1\nmodified\nline3\n"
|
||||
right = "line1\nline2\nnew line\n"
|
||||
|
||||
result = reconcile(parent, left, right, "Line")
|
||||
assert result["text"] == "line1\nmodified\nnew line\n"
|
||||
|
||||
def test_empty_texts(self) -> None:
|
||||
result = reconcile("", "", "")
|
||||
assert result["text"] == ""
|
||||
assert result["cursors"] == []
|
||||
|
||||
def test_invalid_tokenizer(self) -> None:
|
||||
with pytest.raises(ValueError, match="Unknown tokenizer"):
|
||||
reconcile("a", "b", "c", "Invalid") # type: ignore[arg-type]
|
||||
|
||||
|
||||
class TestReconcileWithHistory:
|
||||
def test_returns_history(self) -> None:
|
||||
result = reconcile_with_history(
|
||||
"Merging text is hard!",
|
||||
"Merging text is easy!",
|
||||
"With reconcile, merging documents is hard!",
|
||||
)
|
||||
|
||||
assert result["text"] == "With reconcile, merging documents is easy!"
|
||||
assert len(result["history"]) > 0
|
||||
assert all("text" in span and "history" in span for span in result["history"])
|
||||
|
||||
def test_history_values(self) -> None:
|
||||
valid_histories = {
|
||||
"Unchanged",
|
||||
"AddedFromLeft",
|
||||
"AddedFromRight",
|
||||
"RemovedFromLeft",
|
||||
"RemovedFromRight",
|
||||
}
|
||||
result = reconcile_with_history("Hello", "Hello world", "Hi")
|
||||
for span in result["history"]:
|
||||
assert span["history"] in valid_histories
|
||||
|
||||
|
||||
class TestDiff:
|
||||
def test_basic_diff(self) -> None:
|
||||
result = diff("Hello world", "Hello beautiful world")
|
||||
assert isinstance(result, list)
|
||||
assert all(isinstance(item, (int, str)) for item in result)
|
||||
|
||||
def test_no_change(self) -> None:
|
||||
result = diff("same text", "same text")
|
||||
# A retain-only diff
|
||||
assert all(isinstance(item, int) and item > 0 for item in result)
|
||||
|
||||
|
||||
class TestUndiff:
|
||||
def test_roundtrip(self) -> None:
|
||||
original = "Hello world"
|
||||
changed = "Hello beautiful world"
|
||||
|
||||
d = diff(original, changed)
|
||||
reconstructed = undiff(original, d)
|
||||
assert reconstructed == changed
|
||||
|
||||
def test_empty_roundtrip(self) -> None:
|
||||
d = diff("", "")
|
||||
assert undiff("", d) == ""
|
||||
|
||||
def test_invalid_diff(self) -> None:
|
||||
with pytest.raises(ValueError):
|
||||
undiff("short", [100])
|
||||
|
||||
|
||||
class TestExamples:
|
||||
def test_merge_file_stdout(self, tmp_path: Path) -> None:
|
||||
(tmp_path / "base.txt").write_text("Hello world")
|
||||
(tmp_path / "mine.txt").write_text("Hello beautiful world")
|
||||
(tmp_path / "theirs.txt").write_text("Hi world")
|
||||
|
||||
result = subprocess.run(
|
||||
[
|
||||
sys.executable,
|
||||
str(EXAMPLES_DIR / "merge_file.py"),
|
||||
str(tmp_path / "mine.txt"),
|
||||
str(tmp_path / "base.txt"),
|
||||
str(tmp_path / "theirs.txt"),
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
|
||||
assert result.stdout == "Hi beautiful world"
|
||||
|
||||
def test_merge_file_output_file(self, tmp_path: Path) -> None:
|
||||
(tmp_path / "base.txt").write_text("Hello world")
|
||||
(tmp_path / "mine.txt").write_text("Hello beautiful world")
|
||||
(tmp_path / "theirs.txt").write_text("Hi world")
|
||||
output = tmp_path / "output.txt"
|
||||
|
||||
subprocess.run(
|
||||
[
|
||||
sys.executable,
|
||||
str(EXAMPLES_DIR / "merge_file.py"),
|
||||
str(tmp_path / "mine.txt"),
|
||||
str(tmp_path / "base.txt"),
|
||||
str(tmp_path / "theirs.txt"),
|
||||
str(output),
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
|
||||
assert output.read_text() == "Hi beautiful world"
|
||||
|
||||
|
||||
class TestDiffUndiffInverse:
|
||||
"""Verify diff/undiff roundtrip across large real-world texts."""
|
||||
|
||||
@pytest.mark.parametrize("file1", FILES)
|
||||
@pytest.mark.parametrize("file2", FILES)
|
||||
def test_roundtrip_files(self, file1: str, file2: str) -> None:
|
||||
content1 = (RESOURCES_DIR / file1).read_text()[:50000]
|
||||
content2 = (RESOURCES_DIR / file2).read_text()[:50000]
|
||||
|
||||
changes = diff(content1, content2)
|
||||
actual = undiff(content1, changes)
|
||||
assert actual == content2
|
||||
279
reconcile-python/uv.lock
generated
Normal file
279
reconcile-python/uv.lock
generated
Normal file
|
|
@ -0,0 +1,279 @@
|
|||
version = 1
|
||||
revision = 3
|
||||
requires-python = ">=3.9"
|
||||
resolution-markers = [
|
||||
"python_full_version >= '3.10'",
|
||||
"python_full_version < '3.10'",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "colorama"
|
||||
version = "0.4.6"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "exceptiongroup"
|
||||
version = "1.3.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "typing-extensions", marker = "python_full_version < '3.13'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/50/79/66800aadf48771f6b62f7eb014e352e5d06856655206165d775e675a02c9/exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219", size = 30371, upload-time = "2025-11-21T23:01:54.787Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/8a/0e/97c33bf5009bdbac74fd2beace167cab3f978feb69cc36f1ef79360d6c4e/exceptiongroup-1.3.1-py3-none-any.whl", hash = "sha256:a7a39a3bd276781e98394987d3a5701d0c4edffb633bb7a5144577f82c773598", size = 16740, upload-time = "2025-11-21T23:01:53.443Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "iniconfig"
|
||||
version = "2.1.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
resolution-markers = [
|
||||
"python_full_version < '3.10'",
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/f2/97/ebf4da567aa6827c909642694d71c9fcf53e5b504f2d96afea02718862f3/iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7", size = 4793, upload-time = "2025-03-19T20:09:59.721Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050, upload-time = "2025-03-19T20:10:01.071Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "iniconfig"
|
||||
version = "2.3.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
resolution-markers = [
|
||||
"python_full_version >= '3.10'",
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "maturin"
|
||||
version = "1.12.6"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "tomli", marker = "python_full_version < '3.11'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/0c/18/8b2eebd3ea086a5ec73d7081f95ec64918ceda1900075902fc296ea3ad55/maturin-1.12.6.tar.gz", hash = "sha256:d37be3a811a7f2ee28a0fa0964187efa50e90f21da0c6135c27787fa0b6a89db", size = 269165, upload-time = "2026-03-01T14:54:04.21Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/71/8b/9ddfde8a485489e3ebdc50ee3042ef1c854f00dfea776b951068f6ffe451/maturin-1.12.6-py3-none-linux_armv6l.whl", hash = "sha256:6892b4176992fcc143f9d1c1c874a816e9a041248eef46433db87b0f0aff4278", size = 9789847, upload-time = "2026-03-01T14:54:09.172Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ef/e8/5f7fd3763f214a77ac0388dbcc71cc30aec5490016bd0c8e6bd729fc7b0a/maturin-1.12.6-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:c0c742beeeef7fb93b6a81bd53e75507887e396fd1003c45117658d063812dad", size = 19023833, upload-time = "2026-03-01T14:53:46.743Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e0/7f/706ff3839c8b2046436d4c2bc97596c558728264d18abc298a1ad862a4be/maturin-1.12.6-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:2cb41139295eed6411d3cdafc7430738094c2721f34b7eeb44f33cac516115dc", size = 9821620, upload-time = "2026-03-01T14:54:12.04Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/0e/9c/70917fb123c8dd6b595e913616c9c72d730cbf4a2b6cac8077dc02a12586/maturin-1.12.6-py3-none-manylinux_2_12_i686.manylinux2010_i686.musllinux_1_1_i686.whl", hash = "sha256:351f3af1488a7cbdcff3b6d8482c17164273ac981378a13a4a9937a49aec7d71", size = 9849107, upload-time = "2026-03-01T14:53:48.971Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/59/ea/f1d6ad95c0a12fbe761a7c28a57540341f188564dbe8ad730a4d1788cd32/maturin-1.12.6-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.musllinux_1_1_x86_64.whl", hash = "sha256:6dbddfe4dc7ddee60bbac854870bd7cfec660acb54d015d24597d59a1c828f61", size = 10242855, upload-time = "2026-03-01T14:53:44.605Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/93/1b/2419843a4f1d2fb4747f3dc3d9c4a2881cd97a3274dd94738fcdf0835e79/maturin-1.12.6-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64.whl", hash = "sha256:8fdb0f63e77ee3df0f027a120e9af78dbc31edf0eb0f263d55783c250c33b728", size = 9674972, upload-time = "2026-03-01T14:53:52.763Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/71/46/b60ab2fc996d904b40e55bd475599dcdccd8f7ad3e649bf95e87970df466/maturin-1.12.6-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.musllinux_1_1_armv7l.whl", hash = "sha256:fa84b7493a2e80759cacc2e668fa5b444d55b9994e90707c42904f55d6322c1e", size = 9645755, upload-time = "2026-03-01T14:53:58.497Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a4/96/03f2b55a8c226805115232fc23c4a4f33f0c9d39e11efab8166dc440f80d/maturin-1.12.6-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.musllinux_1_1_ppc64le.whl", hash = "sha256:e90dc12bc6a38e9495692a36c9e231c4d7e0c9bfde60719468ab7d8673db3c45", size = 12737612, upload-time = "2026-03-01T14:54:05.393Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2b/c2/648667022c5b53cdccefa67c245e8a984970f3045820f00c2e23bdb2aff4/maturin-1.12.6-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:06fc8d089f98623ce924c669b70911dfed30f9a29956c362945f727f9abc546b", size = 10455028, upload-time = "2026-03-01T14:54:07.349Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/63/d6/5b5efe3ca0c043357ed3f8d2b2d556169fdbf1ff75e50e8e597708a359d2/maturin-1.12.6-py3-none-manylinux_2_31_riscv64.musllinux_1_1_riscv64.whl", hash = "sha256:75133e56274d43b9227fd49dca9a86e32f1fd56a7b55544910c4ce978c2bb5aa", size = 10014531, upload-time = "2026-03-01T14:53:54.548Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/68/d5/39c594c27b1a8b32a0cb95fff9ad60b888c4352d1d1c389ac1bd20dc1e16/maturin-1.12.6-py3-none-win32.whl", hash = "sha256:3f32e0a3720b81423c9d35c14e728cb1f954678124749776dc72d533ea1115e8", size = 8553012, upload-time = "2026-03-01T14:53:50.706Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/94/66/b262832a91747e04051e21f986bd01a8af81fbffafacc7d66a11e79aab5f/maturin-1.12.6-py3-none-win_amd64.whl", hash = "sha256:977290159d252db946054a0555263c59b3d0c7957135c69e690f4b1558ee9983", size = 9890470, upload-time = "2026-03-01T14:53:56.659Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e3/47/76b8ca470ddc8d7d36aa8c15f5a6aed1841806bb93a0f4ead8ee61e9a088/maturin-1.12.6-py3-none-win_arm64.whl", hash = "sha256:bae91976cdc8148038e13c881e1e844e5c63e58e026e8b9945aa2d19b3b4ae89", size = 8606158, upload-time = "2026-03-01T14:54:02.423Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nodeenv"
|
||||
version = "1.10.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/24/bf/d1bda4f6168e0b2e9e5958945e01910052158313224ada5ce1fb2e1113b8/nodeenv-1.10.0.tar.gz", hash = "sha256:996c191ad80897d076bdfba80a41994c2b47c68e224c542b48feba42ba00f8bb", size = 55611, upload-time = "2025-12-20T14:08:54.006Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/88/b2/d0896bdcdc8d28a7fc5717c305f1a861c26e18c05047949fb371034d98bd/nodeenv-1.10.0-py2.py3-none-any.whl", hash = "sha256:5bb13e3eed2923615535339b3c620e76779af4cb4c6a90deccc9e36b274d3827", size = 23438, upload-time = "2025-12-20T14:08:52.782Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "packaging"
|
||||
version = "26.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/65/ee/299d360cdc32edc7d2cf530f3accf79c4fca01e96ffc950d8a52213bd8e4/packaging-26.0.tar.gz", hash = "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4", size = 143416, upload-time = "2026-01-21T20:50:39.064Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529", size = 74366, upload-time = "2026-01-21T20:50:37.788Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pluggy"
|
||||
version = "1.6.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pygments"
|
||||
version = "2.19.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyright"
|
||||
version = "1.1.408"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "nodeenv" },
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/74/b2/5db700e52554b8f025faa9c3c624c59f1f6c8841ba81ab97641b54322f16/pyright-1.1.408.tar.gz", hash = "sha256:f28f2321f96852fa50b5829ea492f6adb0e6954568d1caa3f3af3a5f555eb684", size = 4400578, upload-time = "2026-01-08T08:07:38.795Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/0c/82/a2c93e32800940d9573fb28c346772a14778b84ba7524e691b324620ab89/pyright-1.1.408-py3-none-any.whl", hash = "sha256:090b32865f4fdb1e0e6cd82bf5618480d48eecd2eb2e70f960982a3d9a4c17c1", size = 6399144, upload-time = "2026-01-08T08:07:37.082Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pytest"
|
||||
version = "8.4.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
resolution-markers = [
|
||||
"python_full_version < '3.10'",
|
||||
]
|
||||
dependencies = [
|
||||
{ name = "colorama", marker = "python_full_version < '3.10' and sys_platform == 'win32'" },
|
||||
{ name = "exceptiongroup", marker = "python_full_version < '3.10'" },
|
||||
{ name = "iniconfig", version = "2.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
|
||||
{ name = "packaging", marker = "python_full_version < '3.10'" },
|
||||
{ name = "pluggy", marker = "python_full_version < '3.10'" },
|
||||
{ name = "pygments", marker = "python_full_version < '3.10'" },
|
||||
{ name = "tomli", marker = "python_full_version < '3.10'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/a3/5c/00a0e072241553e1a7496d638deababa67c5058571567b92a7eaa258397c/pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01", size = 1519618, upload-time = "2025-09-04T14:34:22.711Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/a8/a4/20da314d277121d6534b3a980b29035dcd51e6744bd79075a6ce8fa4eb8d/pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79", size = 365750, upload-time = "2025-09-04T14:34:20.226Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pytest"
|
||||
version = "9.0.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
resolution-markers = [
|
||||
"python_full_version >= '3.10'",
|
||||
]
|
||||
dependencies = [
|
||||
{ name = "colorama", marker = "python_full_version >= '3.10' and sys_platform == 'win32'" },
|
||||
{ name = "exceptiongroup", marker = "python_full_version == '3.10.*'" },
|
||||
{ name = "iniconfig", version = "2.3.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
|
||||
{ name = "packaging", marker = "python_full_version >= '3.10'" },
|
||||
{ name = "pluggy", marker = "python_full_version >= '3.10'" },
|
||||
{ name = "pygments", marker = "python_full_version >= '3.10'" },
|
||||
{ name = "tomli", marker = "python_full_version == '3.10.*'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901, upload-time = "2025-12-06T21:30:51.014Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "reconcile-text"
|
||||
version = "0.11.0"
|
||||
source = { editable = "." }
|
||||
|
||||
[package.dev-dependencies]
|
||||
dev = [
|
||||
{ name = "maturin" },
|
||||
{ name = "pyright" },
|
||||
{ name = "pytest", version = "8.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
|
||||
{ name = "pytest", version = "9.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
|
||||
{ name = "ruff" },
|
||||
]
|
||||
|
||||
[package.metadata]
|
||||
|
||||
[package.metadata.requires-dev]
|
||||
dev = [
|
||||
{ name = "maturin", specifier = ">=1.0,<2.0" },
|
||||
{ name = "pyright", specifier = ">=1" },
|
||||
{ name = "pytest", specifier = ">=8" },
|
||||
{ name = "ruff", specifier = ">=0.15" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ruff"
|
||||
version = "0.15.5"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/77/9b/840e0039e65fcf12758adf684d2289024d6140cde9268cc59887dc55189c/ruff-0.15.5.tar.gz", hash = "sha256:7c3601d3b6d76dce18c5c824fc8d06f4eef33d6df0c21ec7799510cde0f159a2", size = 4574214, upload-time = "2026-03-05T20:06:34.946Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/47/20/5369c3ce21588c708bcbe517a8fbe1a8dfdb5dfd5137e14790b1da71612c/ruff-0.15.5-py3-none-linux_armv6l.whl", hash = "sha256:4ae44c42281f42e3b06b988e442d344a5b9b72450ff3c892e30d11b29a96a57c", size = 10478185, upload-time = "2026-03-05T20:06:29.093Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/44/ed/e81dd668547da281e5dce710cf0bc60193f8d3d43833e8241d006720e42b/ruff-0.15.5-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6edd3792d408ebcf61adabc01822da687579a1a023f297618ac27a5b51ef0080", size = 10859201, upload-time = "2026-03-05T20:06:32.632Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c4/8f/533075f00aaf19b07c5cd6aa6e5d89424b06b3b3f4583bfa9c640a079059/ruff-0.15.5-py3-none-macosx_11_0_arm64.whl", hash = "sha256:89f463f7c8205a9f8dea9d658d59eff49db05f88f89cc3047fb1a02d9f344010", size = 10184752, upload-time = "2026-03-05T20:06:40.312Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/66/0e/ba49e2c3fa0395b3152bad634c7432f7edfc509c133b8f4529053ff024fb/ruff-0.15.5-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ba786a8295c6574c1116704cf0b9e6563de3432ac888d8f83685654fe528fd65", size = 10534857, upload-time = "2026-03-05T20:06:19.581Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/59/71/39234440f27a226475a0659561adb0d784b4d247dfe7f43ffc12dd02e288/ruff-0.15.5-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fd4b801e57955fe9f02b31d20375ab3a5c4415f2e5105b79fb94cf2642c91440", size = 10309120, upload-time = "2026-03-05T20:06:00.435Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f5/87/4140aa86a93df032156982b726f4952aaec4a883bb98cb6ef73c347da253/ruff-0.15.5-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:391f7c73388f3d8c11b794dbbc2959a5b5afe66642c142a6effa90b45f6f5204", size = 11047428, upload-time = "2026-03-05T20:05:51.867Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5a/f7/4953e7e3287676f78fbe85e3a0ca414c5ca81237b7575bdadc00229ac240/ruff-0.15.5-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8dc18f30302e379fe1e998548b0f5e9f4dff907f52f73ad6da419ea9c19d66c8", size = 11914251, upload-time = "2026-03-05T20:06:22.887Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/77/46/0f7c865c10cf896ccf5a939c3e84e1cfaeed608ff5249584799a74d33835/ruff-0.15.5-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1cc6e7f90087e2d27f98dc34ed1b3ab7c8f0d273cc5431415454e22c0bd2a681", size = 11333801, upload-time = "2026-03-05T20:05:57.168Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d3/01/a10fe54b653061585e655f5286c2662ebddb68831ed3eaebfb0eb08c0a16/ruff-0.15.5-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1cb7169f53c1ddb06e71a9aebd7e98fc0fea936b39afb36d8e86d36ecc2636a", size = 11206821, upload-time = "2026-03-05T20:06:03.441Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/7a/0d/2132ceaf20c5e8699aa83da2706ecb5c5dcdf78b453f77edca7fb70f8a93/ruff-0.15.5-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:9b037924500a31ee17389b5c8c4d88874cc6ea8e42f12e9c61a3d754ff72f1ca", size = 11133326, upload-time = "2026-03-05T20:06:25.655Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/72/cb/2e5259a7eb2a0f87c08c0fe5bf5825a1e4b90883a52685524596bfc93072/ruff-0.15.5-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:65bb414e5b4eadd95a8c1e4804f6772bbe8995889f203a01f77ddf2d790929dd", size = 10510820, upload-time = "2026-03-05T20:06:37.79Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ff/20/b67ce78f9e6c59ffbdb5b4503d0090e749b5f2d31b599b554698a80d861c/ruff-0.15.5-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:d20aa469ae3b57033519c559e9bc9cd9e782842e39be05b50e852c7c981fa01d", size = 10302395, upload-time = "2026-03-05T20:05:54.504Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5f/e5/719f1acccd31b720d477751558ed74e9c88134adcc377e5e886af89d3072/ruff-0.15.5-py3-none-musllinux_1_2_i686.whl", hash = "sha256:15388dd28c9161cdb8eda68993533acc870aa4e646a0a277aa166de9ad5a8752", size = 10754069, upload-time = "2026-03-05T20:06:06.422Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c3/9c/d1db14469e32d98f3ca27079dbd30b7b44dbb5317d06ab36718dee3baf03/ruff-0.15.5-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:b30da330cbd03bed0c21420b6b953158f60c74c54c5f4c1dabbdf3a57bf355d2", size = 11304315, upload-time = "2026-03-05T20:06:10.867Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/28/3a/950367aee7c69027f4f422059227b290ed780366b6aecee5de5039d50fa8/ruff-0.15.5-py3-none-win32.whl", hash = "sha256:732e5ee1f98ba5b3679029989a06ca39a950cced52143a0ea82a2102cb592b74", size = 10551676, upload-time = "2026-03-05T20:06:13.705Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b8/00/bf077a505b4e649bdd3c47ff8ec967735ce2544c8e4a43aba42ee9bf935d/ruff-0.15.5-py3-none-win_amd64.whl", hash = "sha256:821d41c5fa9e19117616c35eaa3f4b75046ec76c65e7ae20a333e9a8696bc7fe", size = 11678972, upload-time = "2026-03-05T20:06:45.379Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/fe/4e/cd76eca6db6115604b7626668e891c9dd03330384082e33662fb0f113614/ruff-0.15.5-py3-none-win_arm64.whl", hash = "sha256:b498d1c60d2fe5c10c45ec3f698901065772730b411f164ae270bb6bfcc4740b", size = 10965572, upload-time = "2026-03-05T20:06:16.984Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tomli"
|
||||
version = "2.4.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/82/30/31573e9457673ab10aa432461bee537ce6cef177667deca369efb79df071/tomli-2.4.0.tar.gz", hash = "sha256:aa89c3f6c277dd275d8e243ad24f3b5e701491a860d5121f2cdd399fbb31fc9c", size = 17477, upload-time = "2026-01-11T11:22:38.165Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/3c/d9/3dc2289e1f3b32eb19b9785b6a006b28ee99acb37d1d47f78d4c10e28bf8/tomli-2.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b5ef256a3fd497d4973c11bf142e9ed78b150d36f5773f1ca6088c230ffc5867", size = 153663, upload-time = "2026-01-11T11:21:45.27Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/51/32/ef9f6845e6b9ca392cd3f64f9ec185cc6f09f0a2df3db08cbe8809d1d435/tomli-2.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5572e41282d5268eb09a697c89a7bee84fae66511f87533a6f88bd2f7b652da9", size = 148469, upload-time = "2026-01-11T11:21:46.873Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d6/c2/506e44cce89a8b1b1e047d64bd495c22c9f71f21e05f380f1a950dd9c217/tomli-2.4.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:551e321c6ba03b55676970b47cb1b73f14a0a4dce6a3e1a9458fd6d921d72e95", size = 236039, upload-time = "2026-01-11T11:21:48.503Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b3/40/e1b65986dbc861b7e986e8ec394598187fa8aee85b1650b01dd925ca0be8/tomli-2.4.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5e3f639a7a8f10069d0e15408c0b96a2a828cfdec6fca05296ebcdcc28ca7c76", size = 243007, upload-time = "2026-01-11T11:21:49.456Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/9c/6f/6e39ce66b58a5b7ae572a0f4352ff40c71e8573633deda43f6a379d56b3e/tomli-2.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1b168f2731796b045128c45982d3a4874057626da0e2ef1fdd722848b741361d", size = 240875, upload-time = "2026-01-11T11:21:50.755Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/aa/ad/cb089cb190487caa80204d503c7fd0f4d443f90b95cf4ef5cf5aa0f439b0/tomli-2.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:133e93646ec4300d651839d382d63edff11d8978be23da4cc106f5a18b7d0576", size = 246271, upload-time = "2026-01-11T11:21:51.81Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/0b/63/69125220e47fd7a3a27fd0de0c6398c89432fec41bc739823bcc66506af6/tomli-2.4.0-cp311-cp311-win32.whl", hash = "sha256:b6c78bdf37764092d369722d9946cb65b8767bfa4110f902a1b2542d8d173c8a", size = 96770, upload-time = "2026-01-11T11:21:52.647Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/1e/0d/a22bb6c83f83386b0008425a6cd1fa1c14b5f3dd4bad05e98cf3dbbf4a64/tomli-2.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:d3d1654e11d724760cdb37a3d7691f0be9db5fbdaef59c9f532aabf87006dbaa", size = 107626, upload-time = "2026-01-11T11:21:53.459Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2f/6d/77be674a3485e75cacbf2ddba2b146911477bd887dda9d8c9dfb2f15e871/tomli-2.4.0-cp311-cp311-win_arm64.whl", hash = "sha256:cae9c19ed12d4e8f3ebf46d1a75090e4c0dc16271c5bce1c833ac168f08fb614", size = 94842, upload-time = "2026-01-11T11:21:54.831Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3c/43/7389a1869f2f26dba52404e1ef13b4784b6b37dac93bac53457e3ff24ca3/tomli-2.4.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:920b1de295e72887bafa3ad9f7a792f811847d57ea6b1215154030cf131f16b1", size = 154894, upload-time = "2026-01-11T11:21:56.07Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e9/05/2f9bf110b5294132b2edf13fe6ca6ae456204f3d749f623307cbb7a946f2/tomli-2.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7d6d9a4aee98fac3eab4952ad1d73aee87359452d1c086b5ceb43ed02ddb16b8", size = 149053, upload-time = "2026-01-11T11:21:57.467Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e8/41/1eda3ca1abc6f6154a8db4d714a4d35c4ad90adc0bcf700657291593fbf3/tomli-2.4.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:36b9d05b51e65b254ea6c2585b59d2c4cb91c8a3d91d0ed0f17591a29aaea54a", size = 243481, upload-time = "2026-01-11T11:21:58.661Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d2/6d/02ff5ab6c8868b41e7d4b987ce2b5f6a51d3335a70aa144edd999e055a01/tomli-2.4.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1c8a885b370751837c029ef9bc014f27d80840e48bac415f3412e6593bbc18c1", size = 251720, upload-time = "2026-01-11T11:22:00.178Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/7b/57/0405c59a909c45d5b6f146107c6d997825aa87568b042042f7a9c0afed34/tomli-2.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8768715ffc41f0008abe25d808c20c3d990f42b6e2e58305d5da280ae7d1fa3b", size = 247014, upload-time = "2026-01-11T11:22:01.238Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2c/0e/2e37568edd944b4165735687cbaf2fe3648129e440c26d02223672ee0630/tomli-2.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7b438885858efd5be02a9a133caf5812b8776ee0c969fea02c45e8e3f296ba51", size = 251820, upload-time = "2026-01-11T11:22:02.727Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5a/1c/ee3b707fdac82aeeb92d1a113f803cf6d0f37bdca0849cb489553e1f417a/tomli-2.4.0-cp312-cp312-win32.whl", hash = "sha256:0408e3de5ec77cc7f81960c362543cbbd91ef883e3138e81b729fc3eea5b9729", size = 97712, upload-time = "2026-01-11T11:22:03.777Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/69/13/c07a9177d0b3bab7913299b9278845fc6eaaca14a02667c6be0b0a2270c8/tomli-2.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:685306e2cc7da35be4ee914fd34ab801a6acacb061b6a7abca922aaf9ad368da", size = 108296, upload-time = "2026-01-11T11:22:04.86Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/18/27/e267a60bbeeee343bcc279bb9e8fbed0cbe224bc7b2a3dc2975f22809a09/tomli-2.4.0-cp312-cp312-win_arm64.whl", hash = "sha256:5aa48d7c2356055feef06a43611fc401a07337d5b006be13a30f6c58f869e3c3", size = 94553, upload-time = "2026-01-11T11:22:05.854Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/34/91/7f65f9809f2936e1f4ce6268ae1903074563603b2a2bd969ebbda802744f/tomli-2.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:84d081fbc252d1b6a982e1870660e7330fb8f90f676f6e78b052ad4e64714bf0", size = 154915, upload-time = "2026-01-11T11:22:06.703Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/20/aa/64dd73a5a849c2e8f216b755599c511badde80e91e9bc2271baa7b2cdbb1/tomli-2.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9a08144fa4cba33db5255f9b74f0b89888622109bd2776148f2597447f92a94e", size = 149038, upload-time = "2026-01-11T11:22:07.56Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/9e/8a/6d38870bd3d52c8d1505ce054469a73f73a0fe62c0eaf5dddf61447e32fa/tomli-2.4.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c73add4bb52a206fd0c0723432db123c0c75c280cbd67174dd9d2db228ebb1b4", size = 242245, upload-time = "2026-01-11T11:22:08.344Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/59/bb/8002fadefb64ab2669e5b977df3f5e444febea60e717e755b38bb7c41029/tomli-2.4.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1fb2945cbe303b1419e2706e711b7113da57b7db31ee378d08712d678a34e51e", size = 250335, upload-time = "2026-01-11T11:22:09.951Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a5/3d/4cdb6f791682b2ea916af2de96121b3cb1284d7c203d97d92d6003e91c8d/tomli-2.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bbb1b10aa643d973366dc2cb1ad94f99c1726a02343d43cbc011edbfac579e7c", size = 245962, upload-time = "2026-01-11T11:22:11.27Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f2/4a/5f25789f9a460bd858ba9756ff52d0830d825b458e13f754952dd15fb7bb/tomli-2.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4cbcb367d44a1f0c2be408758b43e1ffb5308abe0ea222897d6bfc8e8281ef2f", size = 250396, upload-time = "2026-01-11T11:22:12.325Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/aa/2f/b73a36fea58dfa08e8b3a268750e6853a6aac2a349241a905ebd86f3047a/tomli-2.4.0-cp313-cp313-win32.whl", hash = "sha256:7d49c66a7d5e56ac959cb6fc583aff0651094ec071ba9ad43df785abc2320d86", size = 97530, upload-time = "2026-01-11T11:22:13.865Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3b/af/ca18c134b5d75de7e8dc551c5234eaba2e8e951f6b30139599b53de9c187/tomli-2.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:3cf226acb51d8f1c394c1b310e0e0e61fecdd7adcb78d01e294ac297dd2e7f87", size = 108227, upload-time = "2026-01-11T11:22:15.224Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/22/c3/b386b832f209fee8073c8138ec50f27b4460db2fdae9ffe022df89a57f9b/tomli-2.4.0-cp313-cp313-win_arm64.whl", hash = "sha256:d20b797a5c1ad80c516e41bc1fb0443ddb5006e9aaa7bda2d71978346aeb9132", size = 94748, upload-time = "2026-01-11T11:22:16.009Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f3/c4/84047a97eb1004418bc10bdbcfebda209fca6338002eba2dc27cc6d13563/tomli-2.4.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:26ab906a1eb794cd4e103691daa23d95c6919cc2fa9160000ac02370cc9dd3f6", size = 154725, upload-time = "2026-01-11T11:22:17.269Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a8/5d/d39038e646060b9d76274078cddf146ced86dc2b9e8bbf737ad5983609a0/tomli-2.4.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:20cedb4ee43278bc4f2fee6cb50daec836959aadaf948db5172e776dd3d993fc", size = 148901, upload-time = "2026-01-11T11:22:18.287Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/73/e5/383be1724cb30f4ce44983d249645684a48c435e1cd4f8b5cded8a816d3c/tomli-2.4.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:39b0b5d1b6dd03684b3fb276407ebed7090bbec989fa55838c98560c01113b66", size = 243375, upload-time = "2026-01-11T11:22:19.154Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/31/f0/bea80c17971c8d16d3cc109dc3585b0f2ce1036b5f4a8a183789023574f2/tomli-2.4.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a26d7ff68dfdb9f87a016ecfd1e1c2bacbe3108f4e0f8bcd2228ef9a766c787d", size = 250639, upload-time = "2026-01-11T11:22:20.168Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2c/8f/2853c36abbb7608e3f945d8a74e32ed3a74ee3a1f468f1ffc7d1cb3abba6/tomli-2.4.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:20ffd184fb1df76a66e34bd1b36b4a4641bd2b82954befa32fe8163e79f1a702", size = 246897, upload-time = "2026-01-11T11:22:21.544Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/49/f0/6c05e3196ed5337b9fe7ea003e95fd3819a840b7a0f2bf5a408ef1dad8ed/tomli-2.4.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:75c2f8bbddf170e8effc98f5e9084a8751f8174ea6ccf4fca5398436e0320bc8", size = 254697, upload-time = "2026-01-11T11:22:23.058Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f3/f5/2922ef29c9f2951883525def7429967fc4d8208494e5ab524234f06b688b/tomli-2.4.0-cp314-cp314-win32.whl", hash = "sha256:31d556d079d72db7c584c0627ff3a24c5d3fb4f730221d3444f3efb1b2514776", size = 98567, upload-time = "2026-01-11T11:22:24.033Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/7b/31/22b52e2e06dd2a5fdbc3ee73226d763b184ff21fc24e20316a44ccc4d96b/tomli-2.4.0-cp314-cp314-win_amd64.whl", hash = "sha256:43e685b9b2341681907759cf3a04e14d7104b3580f808cfde1dfdb60ada85475", size = 108556, upload-time = "2026-01-11T11:22:25.378Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/48/3d/5058dff3255a3d01b705413f64f4306a141a8fd7a251e5a495e3f192a998/tomli-2.4.0-cp314-cp314-win_arm64.whl", hash = "sha256:3d895d56bd3f82ddd6faaff993c275efc2ff38e52322ea264122d72729dca2b2", size = 96014, upload-time = "2026-01-11T11:22:26.138Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b8/4e/75dab8586e268424202d3a1997ef6014919c941b50642a1682df43204c22/tomli-2.4.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:5b5807f3999fb66776dbce568cc9a828544244a8eb84b84b9bafc080c99597b9", size = 163339, upload-time = "2026-01-11T11:22:27.143Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/06/e3/b904d9ab1016829a776d97f163f183a48be6a4deb87304d1e0116a349519/tomli-2.4.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c084ad935abe686bd9c898e62a02a19abfc9760b5a79bc29644463eaf2840cb0", size = 159490, upload-time = "2026-01-11T11:22:28.399Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e3/5a/fc3622c8b1ad823e8ea98a35e3c632ee316d48f66f80f9708ceb4f2a0322/tomli-2.4.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0f2e3955efea4d1cfbcb87bc321e00dc08d2bcb737fd1d5e398af111d86db5df", size = 269398, upload-time = "2026-01-11T11:22:29.345Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/fd/33/62bd6152c8bdd4c305ad9faca48f51d3acb2df1f8791b1477d46ff86e7f8/tomli-2.4.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0e0fe8a0b8312acf3a88077a0802565cb09ee34107813bba1c7cd591fa6cfc8d", size = 276515, upload-time = "2026-01-11T11:22:30.327Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/4b/ff/ae53619499f5235ee4211e62a8d7982ba9e439a0fb4f2f351a93d67c1dd2/tomli-2.4.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:413540dce94673591859c4c6f794dfeaa845e98bf35d72ed59636f869ef9f86f", size = 273806, upload-time = "2026-01-11T11:22:32.56Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/47/71/cbca7787fa68d4d0a9f7072821980b39fbb1b6faeb5f5cf02f4a5559fa28/tomli-2.4.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:0dc56fef0e2c1c470aeac5b6ca8cc7b640bb93e92d9803ddaf9ea03e198f5b0b", size = 281340, upload-time = "2026-01-11T11:22:33.505Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f5/00/d595c120963ad42474cf6ee7771ad0d0e8a49d0f01e29576ee9195d9ecdf/tomli-2.4.0-cp314-cp314t-win32.whl", hash = "sha256:d878f2a6707cc9d53a1be1414bbb419e629c3d6e67f69230217bb663e76b5087", size = 108106, upload-time = "2026-01-11T11:22:34.451Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/de/69/9aa0c6a505c2f80e519b43764f8b4ba93b5a0bbd2d9a9de6e2b24271b9a5/tomli-2.4.0-cp314-cp314t-win_amd64.whl", hash = "sha256:2add28aacc7425117ff6364fe9e06a183bb0251b03f986df0e78e974047571fd", size = 120504, upload-time = "2026-01-11T11:22:35.764Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b3/9f/f1668c281c58cfae01482f7114a4b88d345e4c140386241a1a24dcc9e7bc/tomli-2.4.0-cp314-cp314t-win_arm64.whl", hash = "sha256:2b1e3b80e1d5e52e40e9b924ec43d81570f0e7d09d11081b797bc4692765a3d4", size = 99561, upload-time = "2026-01-11T11:22:36.624Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/23/d1/136eb2cb77520a31e1f64cbae9d33ec6df0d78bdf4160398e86eec8a8754/tomli-2.4.0-py3-none-any.whl", hash = "sha256:1f776e7d669ebceb01dee46484485f43a4048746235e683bcdffacdf1fb4785a", size = 14477, upload-time = "2026-01-11T11:22:37.446Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "typing-extensions"
|
||||
version = "4.15.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391, upload-time = "2025-08-25T13:49:26.313Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" },
|
||||
]
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
[toolchain]
|
||||
channel = "nightly-2025-06-06"
|
||||
channel = "1.94.0"
|
||||
targets = [ "x86_64-unknown-linux-gnu", "x86_64-unknown-linux-musl" ]
|
||||
profile = "default"
|
||||
|
|
|
|||
|
|
@ -1,8 +1 @@
|
|||
imports_granularity = "crate"
|
||||
condense_wildcard_suffixes = true
|
||||
fn_single_line = true
|
||||
format_strings = true
|
||||
reorder_impl_items = true
|
||||
group_imports = "StdExternalCrate"
|
||||
use_field_init_shorthand = true
|
||||
wrap_comments=true
|
||||
|
|
|
|||
|
|
@ -2,7 +2,9 @@
|
|||
|
||||
set -e
|
||||
|
||||
which wasm-pack || cargo install wasm-pack
|
||||
wasm-pack build --target web --features wasm
|
||||
|
||||
cd reconcile-js
|
||||
npm ci
|
||||
npm run build
|
||||
|
|
|
|||
|
|
@ -2,6 +2,8 @@
|
|||
|
||||
set -e
|
||||
|
||||
git pull --rebase
|
||||
|
||||
if [[ -z $1 ]]; then
|
||||
echo "Usage: $0 {patch|minor|major}"
|
||||
exit 1
|
||||
|
|
@ -23,14 +25,29 @@ else
|
|||
fi
|
||||
|
||||
echo "Bumping versions"
|
||||
|
||||
which cargo-set-version || cargo install cargo-edit
|
||||
cargo set-version --bump $1
|
||||
|
||||
which wasm-pack || cargo install wasm-pack
|
||||
|
||||
wasm-pack build --target web --features wasm
|
||||
|
||||
cd reconcile-js
|
||||
npm version $1
|
||||
npm install
|
||||
|
||||
cd -
|
||||
NEWVER=$(grep '^version = ' ../Cargo.toml | head -1 | sed 's/version = "\(.*\)"/\1/')
|
||||
cd ../reconcile-python
|
||||
sed -i '' "s/^version = \".*\"/version = \"$NEWVER\"/" Cargo.toml
|
||||
sed -i '' "s/^version = \".*\"/version = \"$NEWVER\"/" pyproject.toml
|
||||
cargo update --workspace
|
||||
uv lock
|
||||
|
||||
cd ../examples/website
|
||||
npm install
|
||||
|
||||
cd ../..
|
||||
|
||||
git add .
|
||||
TAG=$(node -p "require('./reconcile-js/package.json').version")
|
||||
|
|
|
|||
|
|
@ -3,7 +3,11 @@
|
|||
set -e
|
||||
|
||||
wasm-pack build --target web --features wasm
|
||||
|
||||
cd reconcile-js
|
||||
npm install
|
||||
npm run build
|
||||
|
||||
cd ../examples/website
|
||||
npm install
|
||||
npm run start
|
||||
|
|
|
|||
|
|
@ -2,13 +2,26 @@
|
|||
|
||||
set -e
|
||||
|
||||
which cargo-machete || cargo install cargo-machete
|
||||
cargo machete
|
||||
|
||||
cargo clippy --all-targets --all-features --fix --allow-dirty --allow-staged
|
||||
cargo fmt --all
|
||||
|
||||
cd reconcile-js
|
||||
npm ci
|
||||
npm run format
|
||||
|
||||
cd ../examples/website
|
||||
npm ci
|
||||
npm run format
|
||||
|
||||
cd ../../reconcile-python
|
||||
cp ../README.md .
|
||||
uv run maturin develop -q
|
||||
uv run ruff check python/ tests/
|
||||
uv run ruff format python/ tests/
|
||||
uv run pyright python/ tests/
|
||||
cd -
|
||||
|
||||
echo "Success!"
|
||||
|
|
|
|||
|
|
@ -2,15 +2,35 @@
|
|||
|
||||
set -e
|
||||
|
||||
wasm-pack build --target web --features wasm
|
||||
cargo test --verbose -- --include-ignored
|
||||
which cargo-insta || cargo install cargo-insta
|
||||
which wasm-pack || cargo install wasm-pack
|
||||
|
||||
node_version=$(node --version | cut -d'.' -f1 | tr -d 'v')
|
||||
if [ "$node_version" != "22" ]; then
|
||||
echo "Error: Node.js version 22 is required, but found version $node_version"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
wasm-pack build --target web --features wasm,console_error_panic_hook
|
||||
cargo test --verbose --features serde -- --include-ignored
|
||||
|
||||
cargo test
|
||||
cargo test --features serde
|
||||
cargo test --features wasm
|
||||
wasm-pack test --node --features wasm
|
||||
cargo test --features all
|
||||
|
||||
wasm-pack test --node --features wasm,console_error_panic_hook
|
||||
|
||||
cd reconcile-js
|
||||
npm install
|
||||
npm ci
|
||||
npm run build
|
||||
npm run test
|
||||
cd -
|
||||
|
||||
cd reconcile-python
|
||||
cp ../README.md .
|
||||
uv run maturin develop
|
||||
uv run pytest -v
|
||||
cd -
|
||||
|
||||
echo "Success!"
|
||||
|
|
|
|||
62
src/lib.rs
62
src/lib.rs
|
|
@ -59,7 +59,7 @@
|
|||
//!
|
||||
//! For specialised use cases, such as structured languages, custom
|
||||
//! tokenisation logic can be implemented by providing a function with the
|
||||
//! signature `Fn(&str) -> Vec<Token<String>>`::
|
||||
//! signature `Fn(&str) -> Vec<Token<String>>`:
|
||||
//!
|
||||
//! ```
|
||||
//! use reconcile_text::{reconcile, Token, BuiltinTokenizer};
|
||||
|
|
@ -100,11 +100,11 @@
|
|||
//! let parent = "Hello world";
|
||||
//! let left = TextWithCursors::new(
|
||||
//! "Hello beautiful world".to_string(),
|
||||
//! vec![CursorPosition { id: 1, char_index: 6 }] // After "Hello "
|
||||
//! vec![CursorPosition::new(1, 6)] // After "Hello "
|
||||
//! );
|
||||
//! let right = TextWithCursors::new(
|
||||
//! "Hi world".to_string(),
|
||||
//! vec![CursorPosition { id: 2, char_index: 0 }] // At the beginning
|
||||
//! vec![CursorPosition::new(2, 0)] // At the beginning
|
||||
//! );
|
||||
//!
|
||||
//! let result = reconcile(parent, &left, &right, &*BuiltinTokenizer::Word);
|
||||
|
|
@ -152,11 +152,58 @@
|
|||
//! );
|
||||
//! ```
|
||||
//!
|
||||
//! ## Compact change serialization
|
||||
//!
|
||||
//! The edits can be serialized into a compact representation without the full
|
||||
//! original text, making the size depend only on the changes made.
|
||||
//!
|
||||
//! ```rust
|
||||
//! # #[cfg(feature = "serde")]
|
||||
//! # {
|
||||
//! use reconcile_text::{EditedText, BuiltinTokenizer};
|
||||
//! use serde_yaml;
|
||||
//! use pretty_assertions::assert_eq;
|
||||
//!
|
||||
//!
|
||||
//! let original = "Merging text is hard!";
|
||||
//! let changes = "Merging text is easy with reconcile!";
|
||||
//!
|
||||
//! let result = EditedText::from_strings(
|
||||
//! original,
|
||||
//! &changes.into()
|
||||
//! );
|
||||
//!
|
||||
//! let serialized = serde_yaml::to_string(&result.to_diff().unwrap()).unwrap();
|
||||
//! assert_eq!(
|
||||
//! serialized,
|
||||
//! concat!(
|
||||
//! "- 15\n",
|
||||
//! "- -6\n",
|
||||
//! "- ' easy with reconcile!'\n"
|
||||
//! )
|
||||
//! );
|
||||
//!
|
||||
//! let deserialized = serde_yaml::from_str(&serialized).unwrap();
|
||||
//! let reconstructed = EditedText::from_diff(
|
||||
//! original,
|
||||
//! deserialized,
|
||||
//! &*BuiltinTokenizer::Word
|
||||
//! ).unwrap();
|
||||
//! assert_eq!(
|
||||
//! reconstructed.apply().text(),
|
||||
//! "Merging text is easy with reconcile!"
|
||||
//! );
|
||||
//! # }
|
||||
//! ```
|
||||
//!
|
||||
//! ## Error handling
|
||||
//!
|
||||
//! The library is designed to be robust and will always produce a result, even
|
||||
//! in edge cases. However, be aware that extremely large diffs may have
|
||||
//! performance implications.
|
||||
//! for edge cases.
|
||||
//!
|
||||
//! ## Performance
|
||||
//!
|
||||
//! Be aware that extremely large diffs may have performance implications.
|
||||
//!
|
||||
//! ## Algorithm overview
|
||||
//!
|
||||
|
|
@ -169,13 +216,12 @@ mod tokenizer;
|
|||
mod types;
|
||||
mod utils;
|
||||
|
||||
pub use operation_transformation::{EditedText, reconcile};
|
||||
pub use operation_transformation::{DiffError, EditedText, reconcile};
|
||||
pub use tokenizer::{BuiltinTokenizer, Tokenizer, token::Token};
|
||||
pub use types::{
|
||||
cursor_position::CursorPosition, history::History, side::Side,
|
||||
cursor_position::CursorPosition, history::History, number_or_text::NumberOrText, side::Side,
|
||||
span_with_history::SpanWithHistory, text_with_cursors::TextWithCursors,
|
||||
};
|
||||
pub use utils::is_binary::is_binary;
|
||||
|
||||
#[cfg(feature = "wasm")]
|
||||
pub mod wasm;
|
||||
|
|
|
|||
|
|
@ -1,15 +1,14 @@
|
|||
mod diff_error;
|
||||
mod edited_text;
|
||||
mod operation;
|
||||
mod utils;
|
||||
use std::fmt::Debug;
|
||||
|
||||
pub use diff_error::DiffError;
|
||||
pub use edited_text::EditedText;
|
||||
pub use operation::Operation;
|
||||
|
||||
use crate::{
|
||||
Tokenizer,
|
||||
types::{side::Side, text_with_cursors::TextWithCursors},
|
||||
};
|
||||
use crate::{Tokenizer, types::text_with_cursors::TextWithCursors};
|
||||
|
||||
/// Given an `original` document and two concurrent edits to it,
|
||||
/// return a document containing all changes from both `left`
|
||||
|
|
@ -25,8 +24,7 @@ use crate::{
|
|||
/// into that span, the inserted text will be present in the return
|
||||
/// value.
|
||||
///
|
||||
/// The function supports UTF-8. The arguments are tokenized at the
|
||||
/// granularity of words.
|
||||
/// Supports UTF-8. Arguments are tokenized using the provided `tokenizer`.
|
||||
///
|
||||
/// ```
|
||||
/// use reconcile_text::{reconcile, BuiltinTokenizer};
|
||||
|
|
@ -48,10 +46,8 @@ pub fn reconcile<'a, T>(
|
|||
where
|
||||
T: PartialEq + Clone + Debug,
|
||||
{
|
||||
let left_operations =
|
||||
EditedText::from_strings_with_tokenizer(original, left, tokenizer, Side::Left);
|
||||
let right_operations =
|
||||
EditedText::from_strings_with_tokenizer(original, right, tokenizer, Side::Right);
|
||||
let left_operations = EditedText::from_strings_with_tokenizer(original, left, tokenizer);
|
||||
let right_operations = EditedText::from_strings_with_tokenizer(original, right, tokenizer);
|
||||
|
||||
left_operations.merge(right_operations)
|
||||
}
|
||||
|
|
@ -160,7 +156,7 @@ mod test {
|
|||
.unwrap()
|
||||
.chars()
|
||||
.skip(range.start)
|
||||
.take(range.end)
|
||||
.take(range.len())
|
||||
.collect::<String>()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
|
|
|||
26
src/operation_transformation/diff_error.rs
Normal file
26
src/operation_transformation/diff_error.rs
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
use thiserror::Error;
|
||||
|
||||
/// Error type for invalid diff operations
|
||||
#[derive(Error, Debug, Clone, PartialEq)]
|
||||
pub enum DiffError {
|
||||
/// The diff references a range that exceeds the original text length
|
||||
#[error(
|
||||
"Invalid diff: attempting to access {requested} characters starting at position \
|
||||
{position}, but original text only has {available} characters remaining"
|
||||
)]
|
||||
LengthExceedsOriginal {
|
||||
/// The position where the operation starts
|
||||
position: usize,
|
||||
/// The number of characters requested
|
||||
requested: usize,
|
||||
/// The number of characters available from the position
|
||||
available: usize,
|
||||
},
|
||||
|
||||
/// A character count was too large to represent as i64
|
||||
#[error("Integer overflow: value {value} cannot be represented as i64")]
|
||||
IntegerOverflow {
|
||||
/// The value that caused the overflow
|
||||
value: usize,
|
||||
},
|
||||
}
|
||||
|
|
@ -4,29 +4,30 @@ use std::fmt::Debug;
|
|||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::{
|
||||
BuiltinTokenizer, CursorPosition, TextWithCursors,
|
||||
BuiltinTokenizer, CursorPosition, TextWithCursors, Token,
|
||||
operation_transformation::{
|
||||
Operation,
|
||||
DiffError, Operation,
|
||||
utils::{cook_operations::cook_operations, elongate_operations::elongate_operations},
|
||||
},
|
||||
raw_operation::RawOperation,
|
||||
tokenizer::Tokenizer,
|
||||
types::{history::History, side::Side, span_with_history::SpanWithHistory},
|
||||
types::{
|
||||
history::History, number_or_text::NumberOrText, side::Side,
|
||||
span_with_history::SpanWithHistory,
|
||||
},
|
||||
utils::string_builder::StringBuilder,
|
||||
};
|
||||
|
||||
/// A text document and a sequence of operations that can be applied to the text
|
||||
/// document. `EditedText` supports merging two sequences of operations using
|
||||
/// the principles of Operational Transformation.
|
||||
/// A text document with a sequence of operations derived from diffing it
|
||||
/// against an updated version. Supports merging two `EditedText` instances
|
||||
/// (from the same original) via Operational Transformation.
|
||||
///
|
||||
/// It's mainly created through the `from_strings` method, then merged with
|
||||
/// another `EditedText` derived from the same original text and then applied to
|
||||
/// the original text to get the reconciled text of concurrent edits.
|
||||
/// Created via `from_strings`, `from_strings_with_tokenizer`, or `from_diff`,
|
||||
/// then merged with another `EditedText` and applied to get the reconciled
|
||||
/// text.
|
||||
///
|
||||
/// In addition to text and operations, it also keeps track of cursor positions
|
||||
/// in the original text. The cursor positions are updated when the operations
|
||||
/// are applied, so that the cursor positions can be used to restore the
|
||||
/// cursor positions in the updated text.
|
||||
/// Also tracks cursor positions from the updated text, repositioning them
|
||||
/// when operations are applied.
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone, PartialEq, Default)]
|
||||
pub struct EditedText<'a, T>
|
||||
|
|
@ -35,19 +36,16 @@ where
|
|||
{
|
||||
text: &'a str,
|
||||
operations: Vec<Operation<T>>,
|
||||
operation_sides: Vec<Side>,
|
||||
cursors: Vec<CursorPosition>,
|
||||
}
|
||||
|
||||
impl<'a> EditedText<'a, String> {
|
||||
/// Create an `EditedText` from the given original (old) and updated (new)
|
||||
/// strings. The returned `EditedText` represents the changes from the
|
||||
/// original to the updated text. When the return value is applied to
|
||||
/// the original text, it will result in the updated text. The default
|
||||
/// word tokenizer is used to tokenize the text which splits the text on
|
||||
/// whitespaces.
|
||||
/// Create an `EditedText` from the given original and updated strings.
|
||||
/// Uses the default word tokenizer (splits on word boundaries).
|
||||
#[must_use]
|
||||
pub fn from_strings(original: &'a str, updated: &TextWithCursors, side: Side) -> Self {
|
||||
Self::from_strings_with_tokenizer(original, updated, &*BuiltinTokenizer::Word, side)
|
||||
pub fn from_strings(original: &'a str, updated: &TextWithCursors) -> Self {
|
||||
Self::from_strings_with_tokenizer(original, updated, &*BuiltinTokenizer::Word)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -55,25 +53,25 @@ impl<'a, T> EditedText<'a, T>
|
|||
where
|
||||
T: PartialEq + Clone + Debug,
|
||||
{
|
||||
/// Create an `EditedText` from the given original (old) and updated (new)
|
||||
/// strings. The returned `EditedText` represents the changes from the
|
||||
/// original to the updated text. When the return value is applied to
|
||||
/// the original text, it will result in the updated text. The tokenizer
|
||||
/// function is used to tokenize the text.
|
||||
/// Create an `EditedText` from the given original and updated strings
|
||||
/// using the provided tokenizer
|
||||
#[must_use]
|
||||
pub fn from_strings_with_tokenizer(
|
||||
original: &'a str,
|
||||
updated: &TextWithCursors,
|
||||
tokenizer: &Tokenizer<T>,
|
||||
side: Side,
|
||||
) -> Self {
|
||||
let original_tokens = (tokenizer)(original);
|
||||
let updated_tokens = (tokenizer)(&updated.text());
|
||||
|
||||
let diff: Vec<RawOperation<T>> = RawOperation::vec_from(&original_tokens, &updated_tokens);
|
||||
let operations: Vec<Operation<T>> = cook_operations(elongate_operations(diff)).collect();
|
||||
let operation_count = operations.len();
|
||||
|
||||
Self::new(
|
||||
original,
|
||||
cook_operations(elongate_operations(diff), side).collect(),
|
||||
operations,
|
||||
vec![Side::Left; operation_count],
|
||||
updated.cursors(),
|
||||
)
|
||||
}
|
||||
|
|
@ -81,12 +79,18 @@ where
|
|||
/// Create a new `EditedText` with the given operations.
|
||||
/// The operations must be in the order in which they are meant to be
|
||||
/// applied. The operations must not overlap.
|
||||
fn new(text: &'a str, operations: Vec<Operation<T>>, mut cursors: Vec<CursorPosition>) -> Self {
|
||||
fn new(
|
||||
text: &'a str,
|
||||
operations: Vec<Operation<T>>,
|
||||
operation_sides: Vec<Side>,
|
||||
mut cursors: Vec<CursorPosition>,
|
||||
) -> Self {
|
||||
cursors.sort_by_key(|cursor| cursor.char_index);
|
||||
|
||||
Self {
|
||||
text,
|
||||
operations,
|
||||
operation_sides,
|
||||
cursors,
|
||||
}
|
||||
}
|
||||
|
|
@ -95,6 +99,11 @@ where
|
|||
/// from the same original text. The operations are merged using the
|
||||
/// principles of Operational Transformation. The cursors are updated
|
||||
/// accordingly to reflect the changes made by the merged operations.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Panics if there's an integer overflow (in isize) when calculating new
|
||||
/// cursor positions.
|
||||
#[must_use]
|
||||
#[allow(clippy::too_many_lines)]
|
||||
pub fn merge(self, other: Self) -> Self {
|
||||
|
|
@ -109,6 +118,8 @@ where
|
|||
|
||||
let mut merged_operations: Vec<Operation<T>> =
|
||||
Vec::with_capacity(self.operations.len() + other.operations.len());
|
||||
let mut merged_operation_sides: Vec<Side> =
|
||||
Vec::with_capacity(self.operations.len() + other.operations.len());
|
||||
|
||||
let mut left_iter = self.operations.into_iter();
|
||||
let mut right_iter = other.operations.into_iter();
|
||||
|
|
@ -124,24 +135,21 @@ where
|
|||
let mut last_right_op = None;
|
||||
|
||||
loop {
|
||||
let (side, operation, mut last_other_op) =
|
||||
match (maybe_left_op.clone(), maybe_right_op.clone()) {
|
||||
(Some(left_op), Some(right_op)) => {
|
||||
if left_op
|
||||
.get_sort_key(seen_left_length)
|
||||
.partial_cmp(&right_op.get_sort_key(seen_right_length))
|
||||
== Some(std::cmp::Ordering::Less)
|
||||
{
|
||||
(Side::Left, left_op, last_right_op.clone())
|
||||
} else {
|
||||
(Side::Right, right_op, last_left_op.clone())
|
||||
}
|
||||
let (side, operation) = match (maybe_left_op.as_ref(), maybe_right_op.as_ref()) {
|
||||
(Some(left_op), Some(right_op)) => {
|
||||
if left_op.cmp_priority(seen_left_length, right_op, seen_right_length)
|
||||
== std::cmp::Ordering::Less
|
||||
{
|
||||
(Side::Left, maybe_left_op.take().unwrap())
|
||||
} else {
|
||||
(Side::Right, maybe_right_op.take().unwrap())
|
||||
}
|
||||
}
|
||||
|
||||
(Some(left_op), None) => (Side::Left, left_op, last_right_op.clone()),
|
||||
(None, Some(right_op)) => (Side::Right, right_op, last_left_op.clone()),
|
||||
(None, None) => break,
|
||||
};
|
||||
(Some(_), None) => (Side::Left, maybe_left_op.take().unwrap()),
|
||||
(None, Some(_)) => (Side::Right, maybe_right_op.take().unwrap()),
|
||||
(None, None) => break,
|
||||
};
|
||||
|
||||
let is_advancing_operation = matches!(
|
||||
operation,
|
||||
|
|
@ -149,18 +157,19 @@ where
|
|||
);
|
||||
|
||||
let original_length = operation.len();
|
||||
let result = match side {
|
||||
let (side, result) = match side {
|
||||
Side::Left => {
|
||||
let result = operation.merge_operations(&mut last_other_op);
|
||||
let result = operation.merge_operations(last_right_op.as_ref());
|
||||
|
||||
if let ref op @ (Operation::Insert { .. } | Operation::Equal { .. }) = result {
|
||||
let merged_length_signed =
|
||||
isize::try_from(merged_length).unwrap_or(isize::MAX);
|
||||
let seen_left_length_signed =
|
||||
isize::try_from(seen_left_length).unwrap_or(isize::MAX);
|
||||
let op_len_signed = isize::try_from(op.len()).unwrap_or(isize::MAX);
|
||||
let original_length_signed =
|
||||
isize::try_from(original_length).unwrap_or(isize::MAX);
|
||||
let merged_length_signed = isize::try_from(merged_length)
|
||||
.expect("merged_length must fit in isize");
|
||||
let seen_left_length_signed = isize::try_from(seen_left_length)
|
||||
.expect("seen_left_length must fit in isize");
|
||||
let op_len_signed =
|
||||
isize::try_from(op.len()).expect("op.len() must fit in isize");
|
||||
let original_length_signed = isize::try_from(original_length)
|
||||
.expect("original_length must fit in isize");
|
||||
|
||||
let shift = merged_length_signed - seen_left_length_signed + op_len_signed
|
||||
- original_length_signed;
|
||||
|
|
@ -181,19 +190,20 @@ where
|
|||
maybe_left_op = left_iter.next();
|
||||
last_left_op = Some(result.clone());
|
||||
|
||||
result
|
||||
(Side::Left, result)
|
||||
}
|
||||
Side::Right => {
|
||||
let result = operation.merge_operations(&mut last_other_op);
|
||||
let result = operation.merge_operations(last_left_op.as_ref());
|
||||
|
||||
if let ref op @ (Operation::Insert { .. } | Operation::Equal { .. }) = result {
|
||||
let merged_length_signed =
|
||||
isize::try_from(merged_length).unwrap_or(isize::MAX);
|
||||
let seen_right_length_signed =
|
||||
isize::try_from(seen_right_length).unwrap_or(isize::MAX);
|
||||
let op_len_signed = isize::try_from(op.len()).unwrap_or(isize::MAX);
|
||||
let original_length_signed =
|
||||
isize::try_from(original_length).unwrap_or(isize::MAX);
|
||||
let merged_length_signed = isize::try_from(merged_length)
|
||||
.expect("merged_length must fit in isize");
|
||||
let seen_right_length_signed = isize::try_from(seen_right_length)
|
||||
.expect("seen_right_length must fit in isize");
|
||||
let op_len_signed =
|
||||
isize::try_from(op.len()).expect("op.len() must fit in isize");
|
||||
let original_length_signed = isize::try_from(original_length)
|
||||
.expect("original_length must fit in isize");
|
||||
|
||||
let shift = merged_length_signed - seen_right_length_signed + op_len_signed
|
||||
- original_length_signed;
|
||||
|
|
@ -214,7 +224,7 @@ where
|
|||
maybe_right_op = right_iter.next();
|
||||
last_right_op = Some(result.clone());
|
||||
|
||||
result
|
||||
(Side::Right, result)
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -227,16 +237,24 @@ where
|
|||
}
|
||||
|
||||
merged_operations.push(result);
|
||||
merged_operation_sides.push(side);
|
||||
}
|
||||
|
||||
for cursor in left_cursors.chain(right_cursors) {
|
||||
merged_cursors.push(cursor.with_index(merged_length));
|
||||
}
|
||||
|
||||
Self::new(self.text, merged_operations, merged_cursors)
|
||||
debug_assert_eq!(merged_operations.len(), merged_operation_sides.len());
|
||||
|
||||
Self::new(
|
||||
self.text,
|
||||
merged_operations,
|
||||
merged_operation_sides,
|
||||
merged_cursors,
|
||||
)
|
||||
}
|
||||
|
||||
/// Apply the operations to the text and return the resulting text.
|
||||
/// Apply the operations to the text and return the resulting text
|
||||
#[must_use]
|
||||
pub fn apply(&self) -> TextWithCursors {
|
||||
let mut builder: StringBuilder<'_> = StringBuilder::new(self.text);
|
||||
|
|
@ -251,7 +269,7 @@ where
|
|||
/// Apply the operations to the text and return the resulting text in chunks
|
||||
/// together with the provenance describing where each chunk came from.
|
||||
///
|
||||
/// The result includes deleted spans as well.
|
||||
/// Returns all spans including deletions (not present in the merged text).
|
||||
///
|
||||
/// ```
|
||||
/// use reconcile_text::{History, SpanWithHistory, BuiltinTokenizer, reconcile};
|
||||
|
|
@ -284,47 +302,242 @@ where
|
|||
/// ```
|
||||
#[must_use]
|
||||
pub fn apply_with_history(&self) -> Vec<SpanWithHistory> {
|
||||
let chars: Vec<char> = self.text.chars().collect();
|
||||
let mut builder: StringBuilder<'_> = StringBuilder::new(self.text);
|
||||
|
||||
let mut history = Vec::with_capacity(self.operations.len());
|
||||
|
||||
for operation in &self.operations {
|
||||
for (operation, side) in self.operations.iter().zip(self.operation_sides.iter()) {
|
||||
builder = operation.apply(builder);
|
||||
|
||||
match operation {
|
||||
Operation::Equal { .. } => {
|
||||
history.push(SpanWithHistory::new(builder.take(), History::Unchanged));
|
||||
}
|
||||
Operation::Insert { side, .. } => match side {
|
||||
Side::Left => {
|
||||
history.push(SpanWithHistory::new(builder.take(), History::AddedFromLeft));
|
||||
}
|
||||
Side::Right => history.push(SpanWithHistory::new(
|
||||
builder.take(),
|
||||
History::AddedFromRight,
|
||||
)),
|
||||
},
|
||||
Operation::Insert { .. } => {
|
||||
let h = match side {
|
||||
Side::Left => History::AddedFromLeft,
|
||||
Side::Right => History::AddedFromRight,
|
||||
};
|
||||
history.push(SpanWithHistory::new(builder.take(), h));
|
||||
}
|
||||
Operation::Delete {
|
||||
deleted_character_count,
|
||||
order,
|
||||
side,
|
||||
..
|
||||
} => {
|
||||
let deleted = self.text[*order..*order + *deleted_character_count].to_string();
|
||||
match side {
|
||||
Side::Left => {
|
||||
history.push(SpanWithHistory::new(deleted, History::RemovedFromLeft));
|
||||
}
|
||||
Side::Right => {
|
||||
history.push(SpanWithHistory::new(deleted, History::RemovedFromRight));
|
||||
}
|
||||
}
|
||||
let deleted: String = chars[*order..*order + *deleted_character_count]
|
||||
.iter()
|
||||
.collect();
|
||||
let h = match side {
|
||||
Side::Left => History::RemovedFromLeft,
|
||||
Side::Right => History::RemovedFromRight,
|
||||
};
|
||||
history.push(SpanWithHistory::new(deleted, h));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
history
|
||||
}
|
||||
|
||||
/// Apply the operations and return both the merged text with cursors and
|
||||
/// the provenance history in a single pass
|
||||
#[must_use]
|
||||
pub fn apply_with_all(&self) -> (TextWithCursors, Vec<SpanWithHistory>) {
|
||||
let chars: Vec<char> = self.text.chars().collect();
|
||||
let mut builder: StringBuilder<'_> = StringBuilder::new(self.text);
|
||||
let mut history = Vec::with_capacity(self.operations.len());
|
||||
let mut full_text = String::new();
|
||||
|
||||
for (operation, side) in self.operations.iter().zip(self.operation_sides.iter()) {
|
||||
builder = operation.apply(builder);
|
||||
|
||||
match operation {
|
||||
Operation::Equal { .. } => {
|
||||
let span = builder.take();
|
||||
full_text.push_str(&span);
|
||||
history.push(SpanWithHistory::new(span, History::Unchanged));
|
||||
}
|
||||
Operation::Insert { .. } => {
|
||||
let span = builder.take();
|
||||
full_text.push_str(&span);
|
||||
let h = match side {
|
||||
Side::Left => History::AddedFromLeft,
|
||||
Side::Right => History::AddedFromRight,
|
||||
};
|
||||
history.push(SpanWithHistory::new(span, h));
|
||||
}
|
||||
Operation::Delete {
|
||||
deleted_character_count,
|
||||
order,
|
||||
..
|
||||
} => {
|
||||
let deleted: String = chars[*order..*order + *deleted_character_count]
|
||||
.iter()
|
||||
.collect();
|
||||
let h = match side {
|
||||
Side::Left => History::RemovedFromLeft,
|
||||
Side::Right => History::RemovedFromRight,
|
||||
};
|
||||
history.push(SpanWithHistory::new(deleted, h));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(
|
||||
TextWithCursors::new(full_text, self.cursors.clone()),
|
||||
history,
|
||||
)
|
||||
}
|
||||
|
||||
/// Convert the `EditedText` into a terse representation ready for
|
||||
/// serialization. The result omits cursor positions and the original text.
|
||||
/// This is useful for sending text diffs over the network if there's a
|
||||
/// clear consensus on the original text.
|
||||
///
|
||||
/// Inserts are strings, deletes are negative integers (character count),
|
||||
/// and retained spans are positive integers (character count).
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns `DiffError::IntegerOverflow` if a character count exceeds
|
||||
/// `i64::MAX`.
|
||||
pub fn to_diff(&self) -> Result<Vec<NumberOrText>, DiffError> {
|
||||
let mut result: Vec<NumberOrText> = Vec::with_capacity(self.operations.len());
|
||||
let mut previous_equal: Option<usize> = None;
|
||||
|
||||
for operation in &self.operations {
|
||||
match operation {
|
||||
Operation::Equal { length, .. } => {
|
||||
if let Some(prev_length) = previous_equal {
|
||||
previous_equal = Some(prev_length + *length);
|
||||
} else {
|
||||
previous_equal = Some(*length);
|
||||
}
|
||||
}
|
||||
|
||||
Operation::Insert { text, .. } => {
|
||||
if let Some(prev_length) = previous_equal {
|
||||
result
|
||||
.push(NumberOrText::Number(i64::try_from(prev_length).map_err(
|
||||
|_| DiffError::IntegerOverflow { value: prev_length },
|
||||
)?));
|
||||
previous_equal = None;
|
||||
}
|
||||
|
||||
let text: String = text.iter().map(Token::original).collect();
|
||||
result.push(NumberOrText::Text(text));
|
||||
}
|
||||
|
||||
Operation::Delete {
|
||||
deleted_character_count,
|
||||
..
|
||||
} => {
|
||||
if let Some(prev_length) = previous_equal {
|
||||
result
|
||||
.push(NumberOrText::Number(i64::try_from(prev_length).map_err(
|
||||
|_| DiffError::IntegerOverflow { value: prev_length },
|
||||
)?));
|
||||
previous_equal = None;
|
||||
}
|
||||
|
||||
let count = i64::try_from(*deleted_character_count).map_err(|_| {
|
||||
DiffError::IntegerOverflow {
|
||||
value: *deleted_character_count,
|
||||
}
|
||||
})?;
|
||||
result.push(NumberOrText::Number(-count));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(prev_length) = previous_equal {
|
||||
result
|
||||
.push(NumberOrText::Number(i64::try_from(prev_length).map_err(
|
||||
|_| DiffError::IntegerOverflow { value: prev_length },
|
||||
)?));
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Reconstruct an `EditedText` from a diff and the original text.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns `DiffError::LengthExceedsOriginal` if the diff references a
|
||||
/// range that exceeds the original text length.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Panics if there's an integer overflow in i64.
|
||||
pub fn from_diff(
|
||||
original_text: &'a str,
|
||||
diff: Vec<NumberOrText>,
|
||||
tokenizer: &Tokenizer<T>,
|
||||
) -> Result<EditedText<'a, T>, DiffError> {
|
||||
let mut operations: Vec<Operation<T>> = Vec::with_capacity(diff.len());
|
||||
let mut order = 0;
|
||||
let chars: Vec<char> = original_text.chars().collect();
|
||||
let text_length = chars.len();
|
||||
|
||||
for item in diff {
|
||||
match item {
|
||||
NumberOrText::Number(length) => {
|
||||
if length >= 0 {
|
||||
let length = usize::try_from(length).expect("length must fit in usize");
|
||||
|
||||
// Validate that the range doesn't exceed the original text
|
||||
if order + length > text_length {
|
||||
return Err(DiffError::LengthExceedsOriginal {
|
||||
position: order,
|
||||
requested: length,
|
||||
available: text_length.saturating_sub(order),
|
||||
});
|
||||
}
|
||||
|
||||
let original_characters: String =
|
||||
chars[order..order + length].iter().collect();
|
||||
|
||||
let original_tokens = tokenizer(&original_characters);
|
||||
for token in original_tokens {
|
||||
operations
|
||||
.push(Operation::create_equal(order, token.get_original_length()));
|
||||
order += token.get_original_length();
|
||||
}
|
||||
} else {
|
||||
let length =
|
||||
usize::try_from(-length).expect("negative length must fit in usize");
|
||||
|
||||
// Validate that the delete range doesn't exceed the original text
|
||||
if order + length > text_length {
|
||||
return Err(DiffError::LengthExceedsOriginal {
|
||||
position: order,
|
||||
requested: length,
|
||||
available: text_length.saturating_sub(order),
|
||||
});
|
||||
}
|
||||
|
||||
operations.push(Operation::create_delete(order, length));
|
||||
order += length;
|
||||
}
|
||||
}
|
||||
NumberOrText::Text(text) => {
|
||||
let tokens = tokenizer(&text);
|
||||
operations.push(Operation::create_insert(order, tokens));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let operation_count = operations.len();
|
||||
Ok(EditedText::new(
|
||||
original_text,
|
||||
operations,
|
||||
vec![Side::Left; operation_count],
|
||||
vec![],
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
@ -339,7 +552,7 @@ mod tests {
|
|||
let left = "hello world! How are you? Adam";
|
||||
let right = "Hello, my friend! How are you doing? Albert";
|
||||
|
||||
let operations = EditedText::from_strings(left, &right.into(), Side::Right);
|
||||
let operations = EditedText::from_strings(left, &right.into());
|
||||
|
||||
insta::assert_debug_snapshot!(operations);
|
||||
|
||||
|
|
@ -351,7 +564,7 @@ mod tests {
|
|||
fn test_calculate_operations_with_no_diff() {
|
||||
let text = "hello world!";
|
||||
|
||||
let operations = EditedText::from_strings(text, &text.into(), Side::Right);
|
||||
let operations = EditedText::from_strings(text, &text.into());
|
||||
|
||||
assert_debug_snapshot!(operations);
|
||||
|
||||
|
|
@ -366,10 +579,98 @@ mod tests {
|
|||
let right = "Hello world! How are you?";
|
||||
let expected = "Hello world! How are you? I'm Andras.";
|
||||
|
||||
let operations_1 = EditedText::from_strings(original, &left.into(), Side::Left);
|
||||
let operations_2 = EditedText::from_strings(original, &right.into(), Side::Right);
|
||||
let operations_1 = EditedText::from_strings(original, &left.into());
|
||||
let operations_2 = EditedText::from_strings(original, &right.into());
|
||||
|
||||
let operations = operations_1.merge(operations_2);
|
||||
assert_eq!(operations.apply().text(), expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_diff_length_exceeds_original() {
|
||||
let result = EditedText::from_diff(
|
||||
"hello",
|
||||
vec![
|
||||
10.into(), // too large equal span - should error
|
||||
" world".into(),
|
||||
],
|
||||
&*BuiltinTokenizer::Word,
|
||||
);
|
||||
|
||||
assert!(result.is_err());
|
||||
match result {
|
||||
Err(DiffError::LengthExceedsOriginal {
|
||||
position,
|
||||
requested,
|
||||
available,
|
||||
}) => {
|
||||
assert_eq!(position, 0);
|
||||
assert_eq!(requested, 10);
|
||||
assert_eq!(available, 5);
|
||||
}
|
||||
_ => panic!("Expected LengthExceedsOriginal error"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_diff_valid() {
|
||||
let edited_text = EditedText::from_diff(
|
||||
"hello",
|
||||
vec![
|
||||
5.into(), // exact length
|
||||
" world".into(),
|
||||
],
|
||||
&*BuiltinTokenizer::Word,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let content = edited_text.apply().text();
|
||||
|
||||
assert_eq!(content, "hello world");
|
||||
}
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
#[test]
|
||||
fn test_changes_deserialisation() {
|
||||
let original = "Merging text is hard!";
|
||||
let changes = "Merging text is easy with reconcile!";
|
||||
let result = EditedText::from_strings(original, &changes.into());
|
||||
let serialized = serde_yaml::to_string(&result.to_diff().unwrap()).unwrap();
|
||||
|
||||
let expected = concat!("- 15\n", "- -6\n", "- ' easy with reconcile!'\n",);
|
||||
assert_eq!(serialized, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_apply_with_history_utf8() {
|
||||
let parent = "こんにちは世界"; // "Hello World" in Japanese (7 chars, 21 bytes)
|
||||
let left = "こんにちは宇宙"; // Changed 世界 to 宇宙
|
||||
let right = parent;
|
||||
|
||||
let result = crate::reconcile(
|
||||
parent,
|
||||
&left.into(),
|
||||
&right.into(),
|
||||
&*BuiltinTokenizer::Word,
|
||||
);
|
||||
|
||||
let history = result.apply_with_history();
|
||||
assert!(!history.is_empty());
|
||||
assert_eq!(result.apply().text(), "こんにちは宇宙");
|
||||
}
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
#[test]
|
||||
fn test_changes_serialization() {
|
||||
let original = "The quick brown fox jumps over the lazy dog.";
|
||||
let updated = "The quick red fox jumped over the very lazy dog!";
|
||||
|
||||
let edited_text = EditedText::from_strings(original, &updated.into());
|
||||
|
||||
let changes = edited_text.to_diff().unwrap();
|
||||
let deserialized_edited_text =
|
||||
EditedText::from_diff(original, changes, &*BuiltinTokenizer::Word).unwrap();
|
||||
|
||||
assert_eq!(deserialized_edited_text.apply().text(), updated);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,14 +4,14 @@ use core::fmt::{Debug, Display};
|
|||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::{
|
||||
Side, Token,
|
||||
Token,
|
||||
utils::{
|
||||
find_longest_prefix_contained_within::find_longest_prefix_contained_within,
|
||||
string_builder::StringBuilder,
|
||||
},
|
||||
};
|
||||
|
||||
/// Represents a change that can be applied on a `StringBuilder`.
|
||||
/// Represents a change that can be applied on a `StringBuilder`
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Clone, PartialEq)]
|
||||
pub enum Operation<T>
|
||||
|
|
@ -23,23 +23,21 @@ where
|
|||
length: usize,
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
#[cfg_attr(feature = "serde", serde(skip_serializing))]
|
||||
text: Option<String>,
|
||||
},
|
||||
|
||||
Insert {
|
||||
side: Side,
|
||||
|
||||
order: usize,
|
||||
text: Vec<Token<T>>,
|
||||
},
|
||||
|
||||
Delete {
|
||||
side: Side,
|
||||
|
||||
order: usize,
|
||||
deleted_character_count: usize,
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
#[cfg_attr(feature = "serde", serde(skip_serializing))]
|
||||
deleted_text: Option<String>,
|
||||
},
|
||||
}
|
||||
|
|
@ -48,9 +46,8 @@ impl<T> Operation<T>
|
|||
where
|
||||
T: PartialEq + Clone + Debug,
|
||||
{
|
||||
/// Creates an equal operation with the given index.
|
||||
/// This operation is used to indicate that the text at the given index
|
||||
/// is unchanged.
|
||||
/// Creates an equal (retain) operation starting at the given character
|
||||
/// offset in the original text
|
||||
pub fn create_equal(order: usize, length: usize) -> Self {
|
||||
Operation::Equal {
|
||||
order,
|
||||
|
|
@ -71,16 +68,16 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
/// Creates an insert operation with the given index and text.
|
||||
pub fn create_insert(order: usize, text: Vec<Token<T>>, side: Side) -> Self {
|
||||
Operation::Insert { side, order, text }
|
||||
/// Creates an insert operation at the given character offset with the
|
||||
/// given tokens
|
||||
pub fn create_insert(order: usize, text: Vec<Token<T>>) -> Self {
|
||||
Operation::Insert { order, text }
|
||||
}
|
||||
|
||||
/// Creates a delete operation with the given index and number of
|
||||
/// to-be-deleted characters.
|
||||
pub fn create_delete(order: usize, deleted_character_count: usize, side: Side) -> Self {
|
||||
/// Creates a delete operation at the given character offset for the
|
||||
/// specified number of characters
|
||||
pub fn create_delete(order: usize, deleted_character_count: usize) -> Self {
|
||||
Operation::Delete {
|
||||
side,
|
||||
order,
|
||||
deleted_character_count,
|
||||
|
||||
|
|
@ -89,9 +86,8 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
pub fn create_delete_with_text(order: usize, text: String, side: Side) -> Self {
|
||||
pub fn create_delete_with_text(order: usize, text: String) -> Self {
|
||||
Operation::Delete {
|
||||
side,
|
||||
order,
|
||||
deleted_character_count: text.chars().count(),
|
||||
|
||||
|
|
@ -108,28 +104,55 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
pub fn get_sort_key(&self, insertion_index: usize) -> (usize, usize, usize, String) {
|
||||
(
|
||||
self.order(),
|
||||
match self {
|
||||
Operation::Delete { .. } => 1,
|
||||
Operation::Insert { .. } => 2,
|
||||
Operation::Equal { .. } => 3,
|
||||
},
|
||||
insertion_index,
|
||||
// Make sure that the ordering is deterministic regardless of which text
|
||||
// is left or right.
|
||||
match self {
|
||||
Operation::Equal { length, .. } => length.to_string(),
|
||||
Operation::Insert { text, .. } => {
|
||||
text.iter().map(Token::original).collect::<String>()
|
||||
}
|
||||
fn type_priority(&self) -> u8 {
|
||||
match self {
|
||||
Operation::Delete { .. } => 1,
|
||||
Operation::Insert { .. } => 2,
|
||||
Operation::Equal { .. } => 3,
|
||||
}
|
||||
}
|
||||
|
||||
/// Compare two operations for processing order during merging. Uses
|
||||
/// (order, type, `insertion_index`) with a deterministic content
|
||||
/// tiebreaker that avoids allocating.
|
||||
pub fn cmp_priority(
|
||||
&self,
|
||||
self_index: usize,
|
||||
other: &Self,
|
||||
other_index: usize,
|
||||
) -> std::cmp::Ordering {
|
||||
self.order()
|
||||
.cmp(&other.order())
|
||||
.then_with(|| self.type_priority().cmp(&other.type_priority()))
|
||||
.then_with(|| self_index.cmp(&other_index))
|
||||
.then_with(|| self.deterministic_content_cmp(other))
|
||||
}
|
||||
|
||||
/// Deterministic tiebreaker based on operation content, so that merge
|
||||
/// results are identical regardless of which side is left vs right
|
||||
fn deterministic_content_cmp(&self, other: &Self) -> std::cmp::Ordering {
|
||||
match (self, other) {
|
||||
(Operation::Insert { text: t1, .. }, Operation::Insert { text: t2, .. }) => {
|
||||
let s1 = t1.iter().flat_map(|t| t.original().chars());
|
||||
let s2 = t2.iter().flat_map(|t| t.original().chars());
|
||||
s1.cmp(s2)
|
||||
}
|
||||
(Operation::Equal { length: l1, .. }, Operation::Equal { length: l2, .. }) => {
|
||||
l1.cmp(l2)
|
||||
}
|
||||
(
|
||||
Operation::Delete {
|
||||
deleted_character_count,
|
||||
deleted_character_count: c1,
|
||||
..
|
||||
} => deleted_character_count.to_string(),
|
||||
},
|
||||
)
|
||||
},
|
||||
Operation::Delete {
|
||||
deleted_character_count: c2,
|
||||
..
|
||||
},
|
||||
) => c1.cmp(c2),
|
||||
// Different types are already ordered by type_priority
|
||||
_ => std::cmp::Ordering::Equal,
|
||||
}
|
||||
}
|
||||
|
||||
/// Applies the operation to the given `StringBuilder`, returning the
|
||||
|
|
@ -183,8 +206,8 @@ where
|
|||
builder
|
||||
}
|
||||
|
||||
/// Returns the number of affected characters. It is always greater than 0
|
||||
/// because empty operations cannot be created.
|
||||
/// Returns the number of affected characters. May be 0 after
|
||||
/// `merge_operations`.
|
||||
pub fn len(&self) -> usize {
|
||||
match self {
|
||||
Operation::Equal { length, .. } => *length,
|
||||
|
|
@ -196,17 +219,15 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
/// Merges the operation with the given context, producing a new operation
|
||||
/// and updating the context. This implements a comples FSM that handles
|
||||
/// the merging of operations in a way that is consistent with the text.
|
||||
/// The contexts are updated in-place.
|
||||
/// Adjusts this operation based on `previous_operation` from the other side
|
||||
/// to avoid duplicating or conflicting changes
|
||||
#[allow(clippy::too_many_lines)]
|
||||
pub fn merge_operations(self, previous_operation: &mut Option<Self>) -> Operation<T> {
|
||||
pub fn merge_operations(self, previous_operation: Option<&Self>) -> Operation<T> {
|
||||
let operation = self;
|
||||
|
||||
match (operation, previous_operation) {
|
||||
(
|
||||
Operation::Insert { side, order, text },
|
||||
Operation::Insert { order, text },
|
||||
Some(Operation::Insert {
|
||||
text: previous_inserted_text,
|
||||
..
|
||||
|
|
@ -218,12 +239,11 @@ where
|
|||
let offset_in_tokens =
|
||||
find_longest_prefix_contained_within(previous_inserted_text, &text);
|
||||
|
||||
Operation::create_insert(order, text[offset_in_tokens..].to_vec(), side)
|
||||
Operation::create_insert(order, text[offset_in_tokens..].to_vec())
|
||||
}
|
||||
|
||||
(
|
||||
Operation::Delete {
|
||||
side,
|
||||
order,
|
||||
deleted_character_count,
|
||||
|
||||
|
|
@ -247,20 +267,19 @@ where
|
|||
|
||||
#[cfg(debug_assertions)]
|
||||
let updated_delete = deleted_text.as_ref().map_or_else(
|
||||
|| Operation::create_delete(order + overlap, new_length, side),
|
||||
|| Operation::create_delete(order + overlap, new_length),
|
||||
|text| {
|
||||
Operation::create_delete_with_text(
|
||||
order + overlap,
|
||||
text.chars()
|
||||
.skip(deleted_character_count - new_length)
|
||||
.collect::<String>(),
|
||||
side,
|
||||
)
|
||||
},
|
||||
);
|
||||
|
||||
#[cfg(not(debug_assertions))]
|
||||
let updated_delete = Operation::create_delete(order + overlap, new_length, side);
|
||||
let updated_delete = Operation::create_delete(order + overlap, new_length);
|
||||
|
||||
updated_delete
|
||||
}
|
||||
|
|
@ -302,14 +321,37 @@ where
|
|||
}
|
||||
|
||||
(
|
||||
ref operation @ Operation::Equal { ref order, .. },
|
||||
ref operation @ Operation::Equal {
|
||||
ref order,
|
||||
#[cfg(debug_assertions)]
|
||||
ref text,
|
||||
..
|
||||
},
|
||||
Some(Operation::Equal {
|
||||
order: last_equal_order,
|
||||
length: last_equal_length,
|
||||
#[cfg(debug_assertions)]
|
||||
text: last_equal_text,
|
||||
..
|
||||
}),
|
||||
) => {
|
||||
if operation.len() == *last_equal_length && *order == *last_equal_order {
|
||||
// Both sides retained the same span from the original text,
|
||||
// so we deduplicate by zeroing one out. This is safe because
|
||||
// both EditedTexts are derived from the same original, and
|
||||
// matching (order, length) means they cover the same substring
|
||||
#[cfg(debug_assertions)]
|
||||
debug_assert_eq!(
|
||||
text,
|
||||
last_equal_text,
|
||||
"Equal operations with same order and length should have the same text, \
|
||||
but got {operation:?} vs {:?}",
|
||||
Operation::<T>::Equal {
|
||||
order: *last_equal_order,
|
||||
length: *last_equal_length,
|
||||
text: last_equal_text.clone(),
|
||||
},
|
||||
);
|
||||
Operation::create_equal(*order, 0)
|
||||
} else {
|
||||
operation.clone()
|
||||
|
|
@ -336,18 +378,20 @@ where
|
|||
..
|
||||
} => {
|
||||
#[cfg(debug_assertions)]
|
||||
write!(
|
||||
f,
|
||||
"<equal {} from {order}>",
|
||||
text.as_ref()
|
||||
.map(|text| format!("'{}'", text.replace('\n', "\\n")))
|
||||
.unwrap_or(format!("{length} characters")),
|
||||
)?;
|
||||
{
|
||||
write!(
|
||||
f,
|
||||
"<equal {} from {order}>",
|
||||
text.as_ref()
|
||||
.map(|text| format!("'{}'", text.replace('\n', "\\n")))
|
||||
.unwrap_or(format!("{length} characters")),
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(not(debug_assertions))]
|
||||
write!(f, "<equal {length} from {order}>")?;
|
||||
|
||||
Ok(())
|
||||
{
|
||||
write!(f, "<equal {length} from {order}>")
|
||||
}
|
||||
}
|
||||
Operation::Insert { order, text, .. } => {
|
||||
write!(
|
||||
|
|
@ -368,22 +412,24 @@ where
|
|||
..
|
||||
} => {
|
||||
#[cfg(debug_assertions)]
|
||||
write!(
|
||||
f,
|
||||
"<delete {} from {order}>",
|
||||
deleted_text
|
||||
.as_ref()
|
||||
.map(|text| format!("'{}'", text.replace('\n', "\\n")))
|
||||
.unwrap_or(format!("{deleted_character_count} characters")),
|
||||
)?;
|
||||
{
|
||||
write!(
|
||||
f,
|
||||
"<delete {} from {order}>",
|
||||
deleted_text
|
||||
.as_ref()
|
||||
.map(|text| format!("'{}'", text.replace('\n', "\\n")))
|
||||
.unwrap_or(format!("{deleted_character_count} characters")),
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(not(debug_assertions))]
|
||||
write!(
|
||||
f,
|
||||
"<delete {deleted_character_count} characters from {order}>",
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
{
|
||||
write!(
|
||||
f,
|
||||
"<delete {deleted_character_count} characters from {order}>",
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -393,7 +439,9 @@ impl<T> Debug for Operation<T>
|
|||
where
|
||||
T: PartialEq + Clone + Debug,
|
||||
{
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { write!(f, "{self}") }
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||
write!(f, "{self}")
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
@ -405,8 +453,7 @@ mod tests {
|
|||
#[test]
|
||||
fn test_apply_delete_with_create() {
|
||||
let builder = StringBuilder::new("hello world");
|
||||
let delete_operation =
|
||||
Operation::<()>::create_delete_with_text(0, "hello ".to_owned(), Side::Left);
|
||||
let delete_operation = Operation::<()>::create_delete_with_text(0, "hello ".to_owned());
|
||||
let retain_operation = Operation::<()>::create_equal(6, 5);
|
||||
|
||||
let mut builder = delete_operation.apply(builder);
|
||||
|
|
@ -420,7 +467,7 @@ mod tests {
|
|||
let builder = StringBuilder::new("hello");
|
||||
|
||||
let retain_operation = Operation::<()>::create_equal(0, 5);
|
||||
let insert_operation = Operation::create_insert(5, vec![" my friend".into()], Side::Right);
|
||||
let insert_operation = Operation::create_insert(5, vec![" my friend".into()]);
|
||||
|
||||
let mut builder = retain_operation.apply(builder);
|
||||
builder = insert_operation.apply(builder);
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
---
|
||||
source: src/operation_transformation/edited_text.rs
|
||||
expression: operations
|
||||
snapshot_kind: text
|
||||
---
|
||||
EditedText {
|
||||
text: "hello world! How are you? Adam",
|
||||
|
|
@ -15,5 +14,15 @@ EditedText {
|
|||
<delete ' you? Adam' from 20>,
|
||||
<insert ' you doing? Albert' at 31>,
|
||||
],
|
||||
operation_sides: [
|
||||
Left,
|
||||
Left,
|
||||
Left,
|
||||
Left,
|
||||
Left,
|
||||
Left,
|
||||
Left,
|
||||
Left,
|
||||
],
|
||||
cursors: [],
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
---
|
||||
source: src/operation_transformation/edited_text.rs
|
||||
expression: operations
|
||||
snapshot_kind: text
|
||||
---
|
||||
EditedText {
|
||||
text: "hello world!",
|
||||
|
|
@ -10,5 +9,10 @@ EditedText {
|
|||
<equal ' ' from 5>,
|
||||
<equal 'world!' from 6>,
|
||||
],
|
||||
operation_sides: [
|
||||
Left,
|
||||
Left,
|
||||
Left,
|
||||
],
|
||||
cursors: [],
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
use std::fmt::Debug;
|
||||
|
||||
use crate::{operation_transformation::Operation, raw_operation::RawOperation, types::side::Side};
|
||||
use crate::{operation_transformation::Operation, raw_operation::RawOperation};
|
||||
|
||||
/// Turn raw operations into ordered operations while keeping track of the
|
||||
/// original token's indexes.
|
||||
pub fn cook_operations<I, T>(raw_operations: I, side: Side) -> impl Iterator<Item = Operation<T>>
|
||||
/// original token's indexes
|
||||
pub fn cook_operations<I, T>(raw_operations: I) -> impl Iterator<Item = Operation<T>>
|
||||
where
|
||||
I: IntoIterator<Item = RawOperation<T>>,
|
||||
T: PartialEq + Clone + Debug,
|
||||
|
|
@ -29,18 +29,15 @@ where
|
|||
|
||||
op
|
||||
}
|
||||
RawOperation::Insert(tokens) => {
|
||||
Operation::create_insert(original_text_index, tokens, side)
|
||||
}
|
||||
RawOperation::Insert(tokens) => Operation::create_insert(original_text_index, tokens),
|
||||
RawOperation::Delete(..) => {
|
||||
let op = if cfg!(debug_assertions) {
|
||||
Operation::create_delete_with_text(
|
||||
original_text_index,
|
||||
raw_operation.get_original_text(),
|
||||
side,
|
||||
)
|
||||
} else {
|
||||
Operation::create_delete(original_text_index, length, side)
|
||||
Operation::create_delete(original_text_index, length)
|
||||
};
|
||||
|
||||
original_text_index += length;
|
||||
|
|
|
|||
|
|
@ -2,9 +2,9 @@ use std::fmt::Debug;
|
|||
|
||||
use crate::{tokenizer::token::Token, utils::myers_diff::myers_diff};
|
||||
|
||||
/// Text editing operation containing the to-be-changed `Tokens`-s.
|
||||
/// Text editing operation containing the affected tokens.
|
||||
///
|
||||
/// `RawOperations` can be joined together when the underlying tokens
|
||||
/// `RawOperation`s can be joined together when the underlying tokens
|
||||
/// allow for joining subsequent operations.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum RawOperation<T>
|
||||
|
|
@ -20,9 +20,11 @@ impl<T> RawOperation<T>
|
|||
where
|
||||
T: PartialEq + Clone + Debug,
|
||||
{
|
||||
pub fn vec_from(left: &[Token<T>], right: &[Token<T>]) -> Vec<Self> { myers_diff(left, right) }
|
||||
pub fn vec_from(left: &[Token<T>], right: &[Token<T>]) -> Vec<Self> {
|
||||
myers_diff(left, right)
|
||||
}
|
||||
|
||||
pub fn tokens(&self) -> &Vec<Token<T>> {
|
||||
pub fn tokens(&self) -> &[Token<T>] {
|
||||
match self {
|
||||
RawOperation::Insert(tokens)
|
||||
| RawOperation::Delete(tokens)
|
||||
|
|
@ -34,7 +36,9 @@ where
|
|||
self.tokens().iter().map(Token::get_original_length).sum()
|
||||
}
|
||||
|
||||
pub fn get_original_text(self) -> String { self.tokens().iter().map(Token::original).collect() }
|
||||
pub fn get_original_text(&self) -> String {
|
||||
self.tokens().iter().map(Token::original).collect()
|
||||
}
|
||||
|
||||
pub fn is_left_joinable(&self) -> bool {
|
||||
let first_token = self.tokens().first();
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
mod character_tokenizer;
|
||||
mod line_tokenizer;
|
||||
mod markdown_tokenizer;
|
||||
mod word_tokenizer;
|
||||
|
||||
use std::ops::Deref;
|
||||
|
|
@ -12,7 +13,7 @@ use wasm_bindgen::prelude::*;
|
|||
|
||||
pub mod token;
|
||||
|
||||
/// A trait for tokenizers that take a string and return a list of tokens.
|
||||
/// Type alias for tokenizer functions that split a string into tokens
|
||||
pub type Tokenizer<T> = dyn Fn(&str) -> Vec<Token<T>>;
|
||||
|
||||
#[cfg_attr(feature = "wasm", wasm_bindgen)]
|
||||
|
|
@ -22,6 +23,7 @@ pub type Tokenizer<T> = dyn Fn(&str) -> Vec<Token<T>>;
|
|||
pub enum BuiltinTokenizer {
|
||||
Character = "Character",
|
||||
Line = "Line",
|
||||
Markdown = "Markdown",
|
||||
Word = "Word",
|
||||
}
|
||||
|
||||
|
|
@ -31,6 +33,7 @@ pub enum BuiltinTokenizer {
|
|||
pub enum BuiltinTokenizer {
|
||||
Character,
|
||||
Line,
|
||||
Markdown,
|
||||
Word,
|
||||
}
|
||||
|
||||
|
|
@ -41,6 +44,7 @@ impl Deref for BuiltinTokenizer {
|
|||
match self {
|
||||
BuiltinTokenizer::Character => &character_tokenizer::character_tokenizer,
|
||||
BuiltinTokenizer::Line => &line_tokenizer::line_tokenizer,
|
||||
BuiltinTokenizer::Markdown => &markdown_tokenizer::markdown_tokenizer,
|
||||
BuiltinTokenizer::Word => &word_tokenizer::word_tokenizer,
|
||||
#[cfg(feature = "wasm")]
|
||||
BuiltinTokenizer::__Invalid => panic!("Unexpected tokenizer type"),
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
use super::token::Token;
|
||||
|
||||
/// Splits text into UTF-8 characters.
|
||||
/// Splits text into UTF-8 characters
|
||||
///
|
||||
/// ```not_rust
|
||||
/// "Hey!" -> ["H", "e", "y", "!"]
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
use super::token::Token;
|
||||
|
||||
/// Splits text into lines, preserving line endings as separate tokens.
|
||||
/// Splits text into lines, preserving line endings as separate tokens
|
||||
///
|
||||
/// ## Example
|
||||
///
|
||||
|
|
@ -22,14 +22,20 @@ pub fn line_tokenizer(text: &str) -> Vec<Token<String>> {
|
|||
// Add newline
|
||||
result.push("\n".into());
|
||||
line_start = i + 1;
|
||||
} else if c == '\r' && chars.peek() == Some(&(i + 1, '\n')) {
|
||||
// Handle \r\n
|
||||
} else if c == '\r' {
|
||||
if i > line_start {
|
||||
result.push(text[line_start..i].into());
|
||||
}
|
||||
chars.next(); // consume \n
|
||||
result.push("\r\n".into());
|
||||
line_start = i + 2;
|
||||
if chars.peek() == Some(&(i + 1, '\n')) {
|
||||
// Handle \r\n
|
||||
chars.next(); // consume \n
|
||||
result.push("\r\n".into());
|
||||
line_start = i + 2;
|
||||
} else {
|
||||
// Handle bare \r
|
||||
result.push("\r".into());
|
||||
line_start = i + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -66,5 +72,9 @@ mod tests {
|
|||
assert_debug_snapshot!(line_tokenizer("\n\n"));
|
||||
|
||||
assert_debug_snapshot!(line_tokenizer("Start\n\nEnd"));
|
||||
|
||||
assert_debug_snapshot!(line_tokenizer("Old\rMac\rStyle"));
|
||||
|
||||
assert_debug_snapshot!(line_tokenizer("Mixed\r\nand\rbare"));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
290
src/tokenizer/markdown_tokenizer.rs
Normal file
290
src/tokenizer/markdown_tokenizer.rs
Normal file
|
|
@ -0,0 +1,290 @@
|
|||
use super::{token::Token, word_tokenizer::split_words};
|
||||
|
||||
/// Splits markdown text into tokens that respect markdown formatting structure
|
||||
///
|
||||
/// Builds on word-level tokenization with markdown-specific handling:
|
||||
/// - Newlines are non-joinable tokens (preserves block structure)
|
||||
/// - Block-level prefixes (headings, list markers, blockquotes) attach to the
|
||||
/// first word of their line so they can't be split apart during merge
|
||||
/// - Intra-line whitespace uses the same normalization as the word tokenizer
|
||||
///
|
||||
/// This prevents merges from breaking lists, headings, or other structural
|
||||
/// markdown elements. Inline formatting like `**bold**` is already preserved
|
||||
/// by word-level splitting since formatting markers contain no whitespace.
|
||||
///
|
||||
/// ## Example
|
||||
///
|
||||
/// ```not_rust
|
||||
/// "# Hello\n- item" -> ["# Hello", "\n", "- item"]
|
||||
/// ```
|
||||
pub fn markdown_tokenizer(text: &str) -> Vec<Token<String>> {
|
||||
let mut result = Vec::new();
|
||||
let segments = split_preserving_newlines(text);
|
||||
|
||||
for segment in &segments {
|
||||
if *segment == "\n" || *segment == "\r\n" {
|
||||
let s = (*segment).to_owned();
|
||||
result.push(Token::new(s.clone(), s, false, false));
|
||||
continue;
|
||||
}
|
||||
|
||||
let prefix_len = block_prefix_len(segment);
|
||||
let mut line_tokens = split_words(&segment[prefix_len..]);
|
||||
|
||||
if prefix_len > 0 {
|
||||
let prefix = &segment[..prefix_len];
|
||||
if line_tokens.is_empty() {
|
||||
let s = prefix.to_owned();
|
||||
result.push(Token::new(s.clone(), s, false, false));
|
||||
} else {
|
||||
let first = &line_tokens[0];
|
||||
let combined_original = format!("{prefix}{}", first.original());
|
||||
let combined_normalized = format!("{prefix}{}", first.normalized());
|
||||
line_tokens[0] = Token::new(
|
||||
combined_normalized,
|
||||
combined_original,
|
||||
false,
|
||||
first.is_right_joinable,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
result.extend(line_tokens);
|
||||
}
|
||||
|
||||
// Normalize non-newline whitespace tokens by appending the next token's
|
||||
// original text (same trick as the word tokenizer so each space is unique
|
||||
// in the diff based on what follows it)
|
||||
if !result.is_empty() {
|
||||
for i in 0..result.len() - 1 {
|
||||
if result[i]
|
||||
.original()
|
||||
.chars()
|
||||
.all(|c| c.is_whitespace() && c != '\n' && c != '\r')
|
||||
{
|
||||
let normalized = result[i].normalized().to_owned() + result[i + 1].original();
|
||||
result[i].set_normalized(normalized);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Splits text into alternating segments of line content and newline separators
|
||||
fn split_preserving_newlines(text: &str) -> Vec<&str> {
|
||||
let mut segments = Vec::new();
|
||||
let mut line_start = 0;
|
||||
let bytes = text.as_bytes();
|
||||
let mut i = 0;
|
||||
|
||||
while i < bytes.len() {
|
||||
if bytes[i] == b'\r' && i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
|
||||
if i > line_start {
|
||||
segments.push(&text[line_start..i]);
|
||||
}
|
||||
segments.push(&text[i..i + 2]);
|
||||
i += 2;
|
||||
line_start = i;
|
||||
} else if bytes[i] == b'\n' {
|
||||
if i > line_start {
|
||||
segments.push(&text[line_start..i]);
|
||||
}
|
||||
segments.push(&text[i..=i]);
|
||||
i += 1;
|
||||
line_start = i;
|
||||
} else {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if line_start < text.len() {
|
||||
segments.push(&text[line_start..]);
|
||||
}
|
||||
|
||||
segments
|
||||
}
|
||||
|
||||
/// Returns the byte length of a markdown block-level prefix at the start of a
|
||||
/// line, or 0 if none is found
|
||||
///
|
||||
/// All recognized prefix characters are ASCII, so byte offsets are always
|
||||
/// valid UTF-8 boundaries.
|
||||
///
|
||||
/// Recognized prefixes:
|
||||
/// - ATX headings: `# ` through `###### `
|
||||
/// - Blockquotes: `> ` (single level)
|
||||
/// - Unordered lists: `- `, `* `, `+ ` (with optional leading whitespace)
|
||||
/// - Ordered lists: `1. `, `2) ` etc (with optional leading whitespace)
|
||||
/// - Task lists: `- [ ] `, `- [x] `, `- [X] ` etc (checkbox included in prefix)
|
||||
fn block_prefix_len(line: &str) -> usize {
|
||||
let trimmed = line.trim_start_matches([' ', '\t']);
|
||||
let indent_len = line.len() - trimmed.len();
|
||||
|
||||
// ATX heading: #{1,6} followed by a space
|
||||
if trimmed.starts_with('#') {
|
||||
let hash_count = trimmed.bytes().take_while(|&b| b == b'#').count();
|
||||
if hash_count <= 6 && trimmed.as_bytes().get(hash_count) == Some(&b' ') {
|
||||
return indent_len + hash_count + 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Blockquote: > followed by optional space
|
||||
if trimmed.starts_with("> ") {
|
||||
return indent_len + 2;
|
||||
}
|
||||
if trimmed.starts_with('>') && (trimmed.len() == 1 || trimmed.as_bytes()[1] == b'>') {
|
||||
return indent_len + 1;
|
||||
}
|
||||
|
||||
// Unordered list: [-*+] followed by a space, optionally with task checkbox
|
||||
if trimmed.len() >= 2 {
|
||||
let first_byte = trimmed.as_bytes()[0];
|
||||
if matches!(first_byte, b'-' | b'*' | b'+') && trimmed.as_bytes()[1] == b' ' {
|
||||
return indent_len + 2 + task_checkbox_len(&line[indent_len + 2..]);
|
||||
}
|
||||
}
|
||||
|
||||
// Ordered list: digits followed by [.)] and a space, optionally with task
|
||||
// checkbox
|
||||
let digit_count = trimmed.bytes().take_while(u8::is_ascii_digit).count();
|
||||
if digit_count > 0 && indent_len + digit_count + 2 <= line.len() {
|
||||
let after_digits = trimmed.as_bytes()[digit_count];
|
||||
let after_marker = trimmed.as_bytes().get(digit_count + 1);
|
||||
if matches!(after_digits, b'.' | b')') && after_marker == Some(&b' ') {
|
||||
return indent_len
|
||||
+ digit_count
|
||||
+ 2
|
||||
+ task_checkbox_len(&line[indent_len + digit_count + 2..]);
|
||||
}
|
||||
}
|
||||
|
||||
0
|
||||
}
|
||||
|
||||
/// Returns the byte length of a task list checkbox (`[ ] `, `[x] `, `[X] `)
|
||||
/// at the start of `rest`, or 0 if none is found
|
||||
fn task_checkbox_len(rest: &str) -> usize {
|
||||
if rest.len() >= 4
|
||||
&& rest.as_bytes()[0] == b'['
|
||||
&& matches!(rest.as_bytes()[1], b' ' | b'x' | b'X')
|
||||
&& rest.as_bytes()[2] == b']'
|
||||
&& rest.as_bytes()[3] == b' '
|
||||
{
|
||||
4
|
||||
} else {
|
||||
0
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use insta::assert_debug_snapshot;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_plain_text() {
|
||||
assert_debug_snapshot!(markdown_tokenizer("Hello world"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty() {
|
||||
assert_debug_snapshot!(markdown_tokenizer(""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_headings() {
|
||||
assert_debug_snapshot!(markdown_tokenizer("# Hello world"));
|
||||
assert_debug_snapshot!(markdown_tokenizer("## Sub heading"));
|
||||
assert_debug_snapshot!(markdown_tokenizer("###### Deep heading"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_unordered_list() {
|
||||
assert_debug_snapshot!(markdown_tokenizer("- item one\n- item two\n- item three"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ordered_list() {
|
||||
assert_debug_snapshot!(markdown_tokenizer("1. first\n2. second\n3. third"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_blockquote() {
|
||||
assert_debug_snapshot!(markdown_tokenizer("> quoted text\n> more quoted"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_inline_formatting() {
|
||||
assert_debug_snapshot!(markdown_tokenizer("Some **bold** and *italic* text"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mixed_content() {
|
||||
assert_debug_snapshot!(markdown_tokenizer(
|
||||
"# Title\n\nSome text with **bold**.\n\n- list item\n- another item"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_indented_list() {
|
||||
assert_debug_snapshot!(markdown_tokenizer(" - nested item\n - deeper"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_crlf() {
|
||||
assert_debug_snapshot!(markdown_tokenizer("Line 1\r\nLine 2"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_code_fence() {
|
||||
assert_debug_snapshot!(markdown_tokenizer("```rust\nlet x = 1;\n```"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_heading_only() {
|
||||
assert_debug_snapshot!(markdown_tokenizer("# "));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_link() {
|
||||
assert_debug_snapshot!(markdown_tokenizer("Click [here](https://example.com) now"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_multiline_paragraph() {
|
||||
assert_debug_snapshot!(markdown_tokenizer(
|
||||
"First line\nSecond line\n\nNew paragraph"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_list_with_star_marker() {
|
||||
assert_debug_snapshot!(markdown_tokenizer("* item one\n* item two"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bold_not_confused_with_list() {
|
||||
assert_debug_snapshot!(markdown_tokenizer("**bold text**"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_task_list() {
|
||||
assert_debug_snapshot!(markdown_tokenizer(
|
||||
"- [ ] todo\n- [x] done\n- [X] also done"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ordered_task_list() {
|
||||
assert_debug_snapshot!(markdown_tokenizer("1. [ ] first task\n2. [x] second task"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_unicode() {
|
||||
assert_debug_snapshot!(markdown_tokenizer(
|
||||
"# \u{1F600} Héllo\n- \u{00E9}lément\n> \u{4F60}\u{597D} world"
|
||||
));
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
---
|
||||
source: src/tokenizer/line_tokenizer.rs
|
||||
expression: "line_tokenizer(\"Old\\rMac\\rStyle\")"
|
||||
---
|
||||
[
|
||||
Token {
|
||||
normalized: "Old",
|
||||
original: "Old",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "\r",
|
||||
original: "\r",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "Mac",
|
||||
original: "Mac",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "\r",
|
||||
original: "\r",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "Style",
|
||||
original: "Style",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
]
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
---
|
||||
source: src/tokenizer/line_tokenizer.rs
|
||||
expression: "line_tokenizer(\"Mixed\\r\\nand\\rbare\")"
|
||||
---
|
||||
[
|
||||
Token {
|
||||
normalized: "Mixed",
|
||||
original: "Mixed",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "\r\n",
|
||||
original: "\r\n",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "and",
|
||||
original: "and",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "\r",
|
||||
original: "\r",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "bare",
|
||||
original: "bare",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
]
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
---
|
||||
source: src/tokenizer/markdown_tokenizer.rs
|
||||
expression: "markdown_tokenizer(\"> quoted text\\n> more quoted\")"
|
||||
---
|
||||
[
|
||||
Token {
|
||||
normalized: "> quoted",
|
||||
original: "> quoted",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: " text",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "text",
|
||||
original: "text",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "\n",
|
||||
original: "\n",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: false,
|
||||
},
|
||||
Token {
|
||||
normalized: "> more",
|
||||
original: "> more",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: " quoted",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "quoted",
|
||||
original: "quoted",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
]
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
---
|
||||
source: src/tokenizer/markdown_tokenizer.rs
|
||||
expression: "markdown_tokenizer(\"**bold text**\")"
|
||||
---
|
||||
[
|
||||
Token {
|
||||
normalized: "**bold",
|
||||
original: "**bold",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: " text**",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "text**",
|
||||
original: "text**",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
]
|
||||
|
|
@ -0,0 +1,72 @@
|
|||
---
|
||||
source: src/tokenizer/markdown_tokenizer.rs
|
||||
expression: "markdown_tokenizer(\"```rust\\nlet x = 1;\\n```\")"
|
||||
---
|
||||
[
|
||||
Token {
|
||||
normalized: "```rust",
|
||||
original: "```rust",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "\n",
|
||||
original: "\n",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: false,
|
||||
},
|
||||
Token {
|
||||
normalized: "let",
|
||||
original: "let",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: " x",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "x",
|
||||
original: "x",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: " =",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "=",
|
||||
original: "=",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: " 1;",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "1;",
|
||||
original: "1;",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "\n",
|
||||
original: "\n",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: false,
|
||||
},
|
||||
Token {
|
||||
normalized: "```",
|
||||
original: "```",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
]
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
---
|
||||
source: src/tokenizer/markdown_tokenizer.rs
|
||||
expression: "markdown_tokenizer(\"Line 1\\r\\nLine 2\")"
|
||||
---
|
||||
[
|
||||
Token {
|
||||
normalized: "Line",
|
||||
original: "Line",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: " 1",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "1",
|
||||
original: "1",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "\r\n",
|
||||
original: "\r\n",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: false,
|
||||
},
|
||||
Token {
|
||||
normalized: "Line",
|
||||
original: "Line",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: " 2",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "2",
|
||||
original: "2",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
]
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
source: src/tokenizer/markdown_tokenizer.rs
|
||||
expression: "markdown_tokenizer(\"\")"
|
||||
---
|
||||
[]
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
---
|
||||
source: src/tokenizer/markdown_tokenizer.rs
|
||||
expression: "markdown_tokenizer(\"# \")"
|
||||
---
|
||||
[
|
||||
Token {
|
||||
normalized: "# ",
|
||||
original: "# ",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: false,
|
||||
},
|
||||
]
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
---
|
||||
source: src/tokenizer/markdown_tokenizer.rs
|
||||
expression: "markdown_tokenizer(\"## Sub heading\")"
|
||||
---
|
||||
[
|
||||
Token {
|
||||
normalized: "## Sub",
|
||||
original: "## Sub",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: " heading",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "heading",
|
||||
original: "heading",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
]
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
---
|
||||
source: src/tokenizer/markdown_tokenizer.rs
|
||||
expression: "markdown_tokenizer(\"###### Deep heading\")"
|
||||
---
|
||||
[
|
||||
Token {
|
||||
normalized: "###### Deep",
|
||||
original: "###### Deep",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: " heading",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "heading",
|
||||
original: "heading",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
]
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
---
|
||||
source: src/tokenizer/markdown_tokenizer.rs
|
||||
expression: "markdown_tokenizer(\"# Hello world\")"
|
||||
---
|
||||
[
|
||||
Token {
|
||||
normalized: "# Hello",
|
||||
original: "# Hello",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: " world",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "world",
|
||||
original: "world",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
]
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
---
|
||||
source: src/tokenizer/markdown_tokenizer.rs
|
||||
expression: "markdown_tokenizer(\" - nested item\\n - deeper\")"
|
||||
---
|
||||
[
|
||||
Token {
|
||||
normalized: " - nested",
|
||||
original: " - nested",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: " item",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "item",
|
||||
original: "item",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "\n",
|
||||
original: "\n",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: false,
|
||||
},
|
||||
Token {
|
||||
normalized: " - deeper",
|
||||
original: " - deeper",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
]
|
||||
|
|
@ -0,0 +1,60 @@
|
|||
---
|
||||
source: src/tokenizer/markdown_tokenizer.rs
|
||||
expression: "markdown_tokenizer(\"Some **bold** and *italic* text\")"
|
||||
---
|
||||
[
|
||||
Token {
|
||||
normalized: "Some",
|
||||
original: "Some",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: " **bold**",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "**bold**",
|
||||
original: "**bold**",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: " and",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "and",
|
||||
original: "and",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: " *italic*",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "*italic*",
|
||||
original: "*italic*",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: " text",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "text",
|
||||
original: "text",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
]
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
---
|
||||
source: src/tokenizer/markdown_tokenizer.rs
|
||||
expression: "markdown_tokenizer(\"Click [here](https://example.com) now\")"
|
||||
---
|
||||
[
|
||||
Token {
|
||||
normalized: "Click",
|
||||
original: "Click",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: " [here](https://example.com)",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "[here](https://example.com)",
|
||||
original: "[here](https://example.com)",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: " now",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "now",
|
||||
original: "now",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
]
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
---
|
||||
source: src/tokenizer/markdown_tokenizer.rs
|
||||
expression: "markdown_tokenizer(\"* item one\\n* item two\")"
|
||||
---
|
||||
[
|
||||
Token {
|
||||
normalized: "* item",
|
||||
original: "* item",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: " one",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "one",
|
||||
original: "one",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "\n",
|
||||
original: "\n",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: false,
|
||||
},
|
||||
Token {
|
||||
normalized: "* item",
|
||||
original: "* item",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: " two",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "two",
|
||||
original: "two",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
]
|
||||
|
|
@ -0,0 +1,120 @@
|
|||
---
|
||||
source: src/tokenizer/markdown_tokenizer.rs
|
||||
expression: "markdown_tokenizer(\"# Title\\n\\nSome text with **bold**.\\n\\n- list item\\n- another item\")"
|
||||
---
|
||||
[
|
||||
Token {
|
||||
normalized: "# Title",
|
||||
original: "# Title",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "\n",
|
||||
original: "\n",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: false,
|
||||
},
|
||||
Token {
|
||||
normalized: "\n",
|
||||
original: "\n",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: false,
|
||||
},
|
||||
Token {
|
||||
normalized: "Some",
|
||||
original: "Some",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: " text",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "text",
|
||||
original: "text",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: " with",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "with",
|
||||
original: "with",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: " **bold**.",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "**bold**.",
|
||||
original: "**bold**.",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "\n",
|
||||
original: "\n",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: false,
|
||||
},
|
||||
Token {
|
||||
normalized: "\n",
|
||||
original: "\n",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: false,
|
||||
},
|
||||
Token {
|
||||
normalized: "- list",
|
||||
original: "- list",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: " item",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "item",
|
||||
original: "item",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "\n",
|
||||
original: "\n",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: false,
|
||||
},
|
||||
Token {
|
||||
normalized: "- another",
|
||||
original: "- another",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: " item",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "item",
|
||||
original: "item",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
]
|
||||
|
|
@ -0,0 +1,78 @@
|
|||
---
|
||||
source: src/tokenizer/markdown_tokenizer.rs
|
||||
expression: "markdown_tokenizer(\"First line\\nSecond line\\n\\nNew paragraph\")"
|
||||
---
|
||||
[
|
||||
Token {
|
||||
normalized: "First",
|
||||
original: "First",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: " line",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "line",
|
||||
original: "line",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "\n",
|
||||
original: "\n",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: false,
|
||||
},
|
||||
Token {
|
||||
normalized: "Second",
|
||||
original: "Second",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: " line",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "line",
|
||||
original: "line",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "\n",
|
||||
original: "\n",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: false,
|
||||
},
|
||||
Token {
|
||||
normalized: "\n",
|
||||
original: "\n",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: false,
|
||||
},
|
||||
Token {
|
||||
normalized: "New",
|
||||
original: "New",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: " paragraph",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "paragraph",
|
||||
original: "paragraph",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
]
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
---
|
||||
source: src/tokenizer/markdown_tokenizer.rs
|
||||
expression: "markdown_tokenizer(\"1. first\\n2. second\\n3. third\")"
|
||||
---
|
||||
[
|
||||
Token {
|
||||
normalized: "1. first",
|
||||
original: "1. first",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "\n",
|
||||
original: "\n",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: false,
|
||||
},
|
||||
Token {
|
||||
normalized: "2. second",
|
||||
original: "2. second",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "\n",
|
||||
original: "\n",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: false,
|
||||
},
|
||||
Token {
|
||||
normalized: "3. third",
|
||||
original: "3. third",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
]
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
---
|
||||
source: src/tokenizer/markdown_tokenizer.rs
|
||||
expression: "markdown_tokenizer(\"1. [ ] first task\\n2. [x] second task\")"
|
||||
---
|
||||
[
|
||||
Token {
|
||||
normalized: "1. [ ] first",
|
||||
original: "1. [ ] first",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: " task",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "task",
|
||||
original: "task",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "\n",
|
||||
original: "\n",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: false,
|
||||
},
|
||||
Token {
|
||||
normalized: "2. [x] second",
|
||||
original: "2. [x] second",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: " task",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "task",
|
||||
original: "task",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
]
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
---
|
||||
source: src/tokenizer/markdown_tokenizer.rs
|
||||
expression: "markdown_tokenizer(\"Hello world\")"
|
||||
---
|
||||
[
|
||||
Token {
|
||||
normalized: "Hello",
|
||||
original: "Hello",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: " world",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "world",
|
||||
original: "world",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
]
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
---
|
||||
source: src/tokenizer/markdown_tokenizer.rs
|
||||
expression: "markdown_tokenizer(\"- [ ] todo\\n- [x] done\\n- [X] also done\")"
|
||||
---
|
||||
[
|
||||
Token {
|
||||
normalized: "- [ ] todo",
|
||||
original: "- [ ] todo",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "\n",
|
||||
original: "\n",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: false,
|
||||
},
|
||||
Token {
|
||||
normalized: "- [x] done",
|
||||
original: "- [x] done",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "\n",
|
||||
original: "\n",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: false,
|
||||
},
|
||||
Token {
|
||||
normalized: "- [X] also",
|
||||
original: "- [X] also",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: " done",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "done",
|
||||
original: "done",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
]
|
||||
|
|
@ -0,0 +1,60 @@
|
|||
---
|
||||
source: src/tokenizer/markdown_tokenizer.rs
|
||||
expression: "markdown_tokenizer(\"# \\u{1F600} Héllo\\n- \\u{00E9}lément\\n> \\u{4F60}\\u{597D} world\")"
|
||||
---
|
||||
[
|
||||
Token {
|
||||
normalized: "# 😀",
|
||||
original: "# 😀",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: " Héllo",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "Héllo",
|
||||
original: "Héllo",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "\n",
|
||||
original: "\n",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: false,
|
||||
},
|
||||
Token {
|
||||
normalized: "- élément",
|
||||
original: "- élément",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "\n",
|
||||
original: "\n",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: false,
|
||||
},
|
||||
Token {
|
||||
normalized: "> 你好",
|
||||
original: "> 你好",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: " world",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "world",
|
||||
original: "world",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
]
|
||||
|
|
@ -0,0 +1,72 @@
|
|||
---
|
||||
source: src/tokenizer/markdown_tokenizer.rs
|
||||
expression: "markdown_tokenizer(\"- item one\\n- item two\\n- item three\")"
|
||||
---
|
||||
[
|
||||
Token {
|
||||
normalized: "- item",
|
||||
original: "- item",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: " one",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "one",
|
||||
original: "one",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "\n",
|
||||
original: "\n",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: false,
|
||||
},
|
||||
Token {
|
||||
normalized: "- item",
|
||||
original: "- item",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: " two",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "two",
|
||||
original: "two",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "\n",
|
||||
original: "\n",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: false,
|
||||
},
|
||||
Token {
|
||||
normalized: "- item",
|
||||
original: "- item",
|
||||
is_left_joinable: false,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: " three",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalized: "three",
|
||||
original: "three",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
]
|
||||
|
|
@ -1,38 +1,41 @@
|
|||
use std::fmt::Debug;
|
||||
use std::{
|
||||
fmt::Debug,
|
||||
hash::{Hash, Hasher},
|
||||
};
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// A token is a string that has been normalized in some way.
|
||||
/// A token with a normalized form (used for diffing) and an original form
|
||||
/// (used when applying operations). Joinability flags control whether
|
||||
/// adjacent insertions interleave or group.
|
||||
///
|
||||
/// A token consists of the normalized form is used for comparison, and the
|
||||
/// original form used for subsequently applying `Operation`-s to a text
|
||||
/// document.
|
||||
///
|
||||
/// It's UTF-8 compatible.
|
||||
/// UTF-8 compatible.
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Token<T>
|
||||
where
|
||||
T: PartialEq + Clone + Debug,
|
||||
{
|
||||
/// The normalized form of the token used deriving the diff.
|
||||
/// The normalized form of the token used deriving the diff
|
||||
normalized: T,
|
||||
|
||||
/// The original string, that should be inserted or deleted in the document.
|
||||
/// The original string, that should be inserted or deleted in the document
|
||||
original: String,
|
||||
|
||||
/// Whether the token is semantically joinable with the previous token.
|
||||
/// Whether the token is semantically joinable with the previous token
|
||||
pub is_left_joinable: bool,
|
||||
|
||||
/// Whether the token is semantically joinable with the next token.
|
||||
/// Whether the token is semantically joinable with the next token
|
||||
pub is_right_joinable: bool,
|
||||
}
|
||||
|
||||
/// Trivial implementation of Token when the normalized form is the same as the
|
||||
/// original string.
|
||||
/// original string
|
||||
impl From<&str> for Token<String> {
|
||||
fn from(text: &str) -> Self { Token::new(text.to_owned(), text.to_owned(), true, true) }
|
||||
fn from(text: &str) -> Self {
|
||||
Token::new(text.to_owned(), text.to_owned(), true, true)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Token<T>
|
||||
|
|
@ -53,18 +56,39 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
pub fn original(&self) -> &str { &self.original }
|
||||
pub fn original(&self) -> &str {
|
||||
&self.original
|
||||
}
|
||||
|
||||
pub fn set_normalized(&mut self, normalized: T) { self.normalized = normalized; }
|
||||
pub fn set_normalized(&mut self, normalized: T) {
|
||||
self.normalized = normalized;
|
||||
}
|
||||
|
||||
pub fn normalized(&self) -> &T { &self.normalized }
|
||||
pub fn normalized(&self) -> &T {
|
||||
&self.normalized
|
||||
}
|
||||
|
||||
pub fn get_original_length(&self) -> usize { self.original.chars().count() }
|
||||
pub fn get_original_length(&self) -> usize {
|
||||
self.original.chars().count()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> PartialEq for Token<T>
|
||||
where
|
||||
T: PartialEq + Clone + Debug,
|
||||
{
|
||||
fn eq(&self, other: &Self) -> bool { self.normalized == other.normalized }
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.normalized == other.normalized
|
||||
}
|
||||
}
|
||||
|
||||
/// Hashes based on the `normalized` field only, consistent with the
|
||||
/// [`PartialEq`] implementation.
|
||||
impl<T> Hash for Token<T>
|
||||
where
|
||||
T: PartialEq + Clone + Debug + Hash,
|
||||
{
|
||||
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||
self.normalized.hash(state);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
use super::token::Token;
|
||||
|
||||
/// Splits text on word boundaries, creating tokens of alternating words and
|
||||
/// whitespace with the whitespace getting unique IDs.
|
||||
/// whitespace with the whitespace getting unique IDs
|
||||
///
|
||||
/// ## Example
|
||||
///
|
||||
|
|
@ -9,6 +9,26 @@ use super::token::Token;
|
|||
/// "Hi there!" -> ["Hi", " ", "there!"]
|
||||
/// ```
|
||||
pub fn word_tokenizer(text: &str) -> Vec<Token<String>> {
|
||||
let mut result = split_words(text);
|
||||
|
||||
if result.is_empty() {
|
||||
return result;
|
||||
}
|
||||
|
||||
// normalize whitespace tokens by concatenating with the following token
|
||||
for i in 0..result.len() - 1 {
|
||||
if result[i].original().chars().all(char::is_whitespace) {
|
||||
let normalized = result[i].normalized().to_owned() + result[i + 1].original();
|
||||
result[i].set_normalized(normalized);
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Splits text into alternating word and whitespace tokens without any
|
||||
/// normalization. Shared by `word_tokenizer` and `markdown_tokenizer`.
|
||||
pub(super) fn split_words(text: &str) -> Vec<Token<String>> {
|
||||
let mut result = Vec::new();
|
||||
|
||||
let mut previous_boundary_index = 0;
|
||||
|
|
@ -28,18 +48,6 @@ pub fn word_tokenizer(text: &str) -> Vec<Token<String>> {
|
|||
result.push(text[previous_boundary_index..].into());
|
||||
}
|
||||
|
||||
if result.is_empty() {
|
||||
return result;
|
||||
}
|
||||
|
||||
// normalize whitespace tokens by concatenating with the following token
|
||||
for i in 0..result.len() - 1 {
|
||||
if result[i].original().chars().all(char::is_whitespace) {
|
||||
let normalized = result[i].normalized().to_owned() + result[i + 1].original();
|
||||
result[i].set_normalized(normalized);
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
pub mod cursor_position;
|
||||
pub mod history;
|
||||
pub mod number_or_text;
|
||||
pub mod side;
|
||||
pub mod span_with_history;
|
||||
pub mod text_with_cursors;
|
||||
|
|
|
|||
|
|
@ -4,21 +4,23 @@ use serde::{Deserialize, Serialize};
|
|||
use wasm_bindgen::prelude::*;
|
||||
|
||||
/// `CursorPosition` represents the position of an identifiable cursor in a text
|
||||
/// document based on its (UTF-8) character index.
|
||||
/// document based on its (UTF-8) character index
|
||||
#[allow(clippy::unsafe_derive_deserialize)]
|
||||
#[cfg_attr(feature = "wasm", wasm_bindgen)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone, PartialEq, Default)]
|
||||
pub struct CursorPosition {
|
||||
pub id: usize,
|
||||
pub char_index: usize,
|
||||
pub(crate) id: usize,
|
||||
pub(crate) char_index: usize,
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "wasm", wasm_bindgen)]
|
||||
impl CursorPosition {
|
||||
#[cfg_attr(feature = "wasm", wasm_bindgen(constructor))]
|
||||
#[must_use]
|
||||
pub fn new(id: usize, char_index: usize) -> Self { Self { id, char_index } }
|
||||
pub fn new(id: usize, char_index: usize) -> Self {
|
||||
Self { id, char_index }
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn with_index(&self, index: usize) -> Self {
|
||||
|
|
@ -29,9 +31,13 @@ impl CursorPosition {
|
|||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn id(&self) -> usize { self.id }
|
||||
pub fn id(&self) -> usize {
|
||||
self.id
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "wasm", wasm_bindgen(js_name = characterIndex))]
|
||||
#[must_use]
|
||||
pub fn char_index(&self) -> usize { self.char_index }
|
||||
pub fn char_index(&self) -> usize {
|
||||
self.char_index
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,8 +15,7 @@ pub enum History {
|
|||
RemovedFromRight = "RemovedFromRight",
|
||||
}
|
||||
|
||||
/// Simple enum for describing the result of `reconcile` in a flat list.
|
||||
/// When compiled to WASM, the enum values are the same as their names.
|
||||
/// Provenance label for each span returned by `apply_with_history`
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
#[cfg(not(feature = "wasm"))]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
|
|
|
|||
119
src/types/number_or_text.rs
Normal file
119
src/types/number_or_text.rs
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
use std::{borrow::Cow, fmt::Debug};
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
#[cfg(feature = "wasm")]
|
||||
use wasm_bindgen::prelude::*;
|
||||
|
||||
#[cfg(feature = "wasm")]
|
||||
#[allow(clippy::cast_precision_loss)]
|
||||
const INTEGRAL_LIMIT: f64 = (1u64 << 53) as f64;
|
||||
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[cfg_attr(feature = "serde", serde(untagged))]
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum NumberOrText {
|
||||
Number(i64),
|
||||
Text(String),
|
||||
}
|
||||
|
||||
#[cfg(feature = "wasm")]
|
||||
impl TryFrom<JsValue> for NumberOrText {
|
||||
type Error = DeserialisationError;
|
||||
|
||||
fn try_from(value: JsValue) -> Result<Self, Self::Error> {
|
||||
if let Ok(num) = value.clone().try_into() {
|
||||
return Ok(NumberOrText::Number(num));
|
||||
}
|
||||
|
||||
if let Some(num) = value.clone().as_f64() {
|
||||
if num.is_nan() {
|
||||
return Err(DeserialisationError::new("NaN is not a valid number"));
|
||||
}
|
||||
|
||||
if num.abs() > INTEGRAL_LIMIT {
|
||||
return Err(DeserialisationError::new(
|
||||
"Floating-point number exceeds safe integer limit, use BigInt instead",
|
||||
));
|
||||
}
|
||||
|
||||
#[allow(clippy::cast_possible_truncation)]
|
||||
return Ok(NumberOrText::Number(num.round() as i64));
|
||||
}
|
||||
|
||||
if let Ok(text) = value.try_into() {
|
||||
return Ok(NumberOrText::Text(text));
|
||||
}
|
||||
|
||||
Err(DeserialisationError::new(
|
||||
"Could not parse JsValue as either number or string",
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "wasm")]
|
||||
impl From<NumberOrText> for JsValue {
|
||||
fn from(value: NumberOrText) -> Self {
|
||||
match value {
|
||||
NumberOrText::Number(num) => JsValue::from(num),
|
||||
NumberOrText::Text(text) => JsValue::from(text),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<i64> for NumberOrText {
|
||||
fn from(value: i64) -> Self {
|
||||
NumberOrText::Number(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<String> for NumberOrText {
|
||||
fn from(value: String) -> Self {
|
||||
NumberOrText::Text(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&str> for NumberOrText {
|
||||
fn from(value: &str) -> Self {
|
||||
NumberOrText::Text(value.to_owned())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<Cow<'a, str>> for NumberOrText {
|
||||
fn from(value: Cow<'a, str>) -> Self {
|
||||
NumberOrText::Text(value.into_owned())
|
||||
}
|
||||
}
|
||||
|
||||
/// Error type for deserialisation failures
|
||||
#[cfg(feature = "wasm")]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DeserialisationError {
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
#[cfg(feature = "wasm")]
|
||||
impl DeserialisationError {
|
||||
pub fn new(message: impl Into<String>) -> Self {
|
||||
Self {
|
||||
message: message.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "wasm")]
|
||||
impl std::fmt::Display for DeserialisationError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "Deserialisation error: {}", self.message)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "wasm")]
|
||||
impl std::error::Error for DeserialisationError {}
|
||||
|
||||
#[cfg(feature = "wasm")]
|
||||
impl From<DeserialisationError> for JsValue {
|
||||
fn from(error: DeserialisationError) -> Self {
|
||||
JsValue::from_str(&error.message)
|
||||
}
|
||||
}
|
||||
|
|
@ -4,7 +4,7 @@ use std::fmt::Display;
|
|||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Pretty-printable flag to tell which conflicting edit (side)
|
||||
/// an operation is associated with.
|
||||
/// an operation is associated with
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum Side {
|
||||
|
|
|
|||
|
|
@ -5,8 +5,7 @@ use wasm_bindgen::prelude::*;
|
|||
|
||||
use crate::types::history::History;
|
||||
|
||||
/// Wrapper type for `(String, History)` where History describes the origin of
|
||||
/// `text`.
|
||||
/// A text span annotated with its origin in a merge result
|
||||
#[allow(clippy::unsafe_derive_deserialize)]
|
||||
#[cfg_attr(feature = "wasm", wasm_bindgen)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
|
|
@ -19,11 +18,17 @@ pub struct SpanWithHistory {
|
|||
#[cfg_attr(feature = "wasm", wasm_bindgen)]
|
||||
impl SpanWithHistory {
|
||||
#[must_use]
|
||||
pub fn new(text: String, history: History) -> Self { SpanWithHistory { text, history } }
|
||||
pub fn new(text: String, history: History) -> Self {
|
||||
SpanWithHistory { text, history }
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn history(&self) -> History { self.history }
|
||||
pub fn history(&self) -> History {
|
||||
self.history
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn text(&self) -> String { self.text.clone() }
|
||||
pub fn text(&self) -> String {
|
||||
self.text.clone()
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,12 +12,15 @@ pub struct TextWithCursors {
|
|||
|
||||
#[cfg_attr(feature = "wasm", wasm_bindgen)]
|
||||
impl TextWithCursors {
|
||||
/// # Panics
|
||||
///
|
||||
/// Panics if any cursor's `char_index` exceeds the text's character length.
|
||||
#[cfg_attr(feature = "wasm", wasm_bindgen(constructor))]
|
||||
#[must_use]
|
||||
pub fn new(text: String, cursors: Vec<CursorPosition>) -> Self {
|
||||
let length = text.chars().count();
|
||||
for cursor in &cursors {
|
||||
debug_assert!(
|
||||
assert!(
|
||||
cursor.char_index <= length,
|
||||
// cursor.char_index == length means that the cursor is at the end
|
||||
"Cursor positions ({}) must be contained within the text (of length {length}) or \
|
||||
|
|
@ -30,10 +33,21 @@ impl TextWithCursors {
|
|||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn text(&self) -> String { self.text.to_string() }
|
||||
pub fn text(&self) -> String {
|
||||
self.text.clone()
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn cursors(&self) -> Vec<CursorPosition> { self.cursors.clone() }
|
||||
pub fn cursors(&self) -> Vec<CursorPosition> {
|
||||
self.cursors.clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl TextWithCursors {
|
||||
#[must_use]
|
||||
pub fn text_ref(&self) -> &str {
|
||||
&self.text
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<&'a str> for TextWithCursors {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
pub mod common_prefix_len;
|
||||
pub mod common_suffix_len;
|
||||
pub mod find_longest_prefix_contained_within;
|
||||
pub mod is_binary;
|
||||
pub mod myers_diff;
|
||||
pub mod string_builder;
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ use std::fmt::Debug;
|
|||
use crate::Token;
|
||||
|
||||
/// Given two lists of tokens, returns `length` where the `old` list
|
||||
/// somewhere within contains the `length` prefix of the `new` list.
|
||||
/// somewhere within contains the `length` prefix of the `new` list
|
||||
///
|
||||
/// ## Example
|
||||
///
|
||||
|
|
|
|||
|
|
@ -1,26 +0,0 @@
|
|||
/// Heuristically determine if the given data is a binary or a text file's
|
||||
/// content.
|
||||
///
|
||||
/// Only text inputs can be reconciled using the crate's functions.
|
||||
#[must_use]
|
||||
pub fn is_binary(data: &[u8]) -> bool {
|
||||
if data.contains(&0) {
|
||||
// Even though the NUL character is valid in UTF-8, it's highly suspicious in
|
||||
// human-readable text.
|
||||
return true;
|
||||
}
|
||||
|
||||
std::str::from_utf8(data).is_err()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_is_binary() {
|
||||
assert!(is_binary(&[0, 159, 146, 150]));
|
||||
assert!(is_binary(&[0, 12]));
|
||||
assert!(!is_binary(b"hello"));
|
||||
}
|
||||
}
|
||||
|
|
@ -11,13 +11,11 @@
|
|||
//! The implementation of this algorithm is based on the implementation by
|
||||
//! Brandon Williams.
|
||||
//!
|
||||
//! # Heuristics
|
||||
//! # Complexity
|
||||
//!
|
||||
//! At present this implementation of Myers' does not implement any more
|
||||
//! advanced heuristics that would solve some pathological cases. For instance
|
||||
//! passing two large and completely distinct sequences to the algorithm will
|
||||
//! make it spin without making reasonable progress.
|
||||
//! For potential improvements here see [similar#15](https://github.com/mitsuhiko/similar/issues/15).
|
||||
//! The worst case (completely dissimilar inputs) is `O((N+M)²)` time. In
|
||||
//! practice the divide-and-conquer strategy with prefix/suffix stripping keeps
|
||||
//! subproblems small for typical text.
|
||||
|
||||
use std::{
|
||||
fmt::Debug,
|
||||
|
|
@ -41,26 +39,21 @@ pub fn myers_diff<T>(old: &[Token<T>], new: &[Token<T>]) -> Vec<RawOperation<T>>
|
|||
where
|
||||
T: PartialEq + Clone + Debug,
|
||||
{
|
||||
let max_d = (old.len() + new.len()).div_ceil(2) + 1;
|
||||
let mut vb = V::new(max_d);
|
||||
let mut vf = V::new(max_d);
|
||||
let mut result = Vec::new();
|
||||
let max_edit_distance = (old.len() + new.len()).div_ceil(2) + 1;
|
||||
let mut backward_endpoints = FurthestEndpoints::new(max_edit_distance);
|
||||
let mut forward_endpoints = FurthestEndpoints::new(max_edit_distance);
|
||||
let mut result = Vec::with_capacity(old.len() + new.len());
|
||||
|
||||
conquer(
|
||||
old,
|
||||
0..old.len(),
|
||||
new,
|
||||
0..new.len(),
|
||||
&mut vf,
|
||||
&mut vb,
|
||||
&mut forward_endpoints,
|
||||
&mut backward_endpoints,
|
||||
&mut result,
|
||||
);
|
||||
|
||||
debug_assert!(
|
||||
result.iter().all(|op| op.tokens().len() == 1),
|
||||
"All operations must be of length 1"
|
||||
);
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
|
|
@ -68,49 +61,52 @@ where
|
|||
// edges. All D-paths consist of a (D - 1)-path followed by a non-diagonal edge
|
||||
// and then a possibly empty sequence of diagonal edges called a snake.
|
||||
|
||||
/// `V` contains the endpoints of the furthest reaching `D-paths`. For each
|
||||
/// recorded endpoint `(x,y)` in diagonal `k`, we only need to retain `x`
|
||||
/// because `y` can be computed from `x - k`. In other words, `V` is an array of
|
||||
/// integers where `V[k]` contains the row index of the endpoint of the furthest
|
||||
/// reaching path in diagonal `k`.
|
||||
/// Contains the endpoints of the furthest reaching `D-paths`. For each
|
||||
/// recorded endpoint `(x, y)` on diagonal `k`, we only need to retain `x`
|
||||
/// because `y` can be computed from `x - k`. In other words, this is an array
|
||||
/// of integers where `endpoints[k]` contains the row index of the endpoint of
|
||||
/// the furthest reaching path on diagonal `k`.
|
||||
///
|
||||
/// We can't use a traditional Vec to represent `V` since we use `k` as an index
|
||||
/// and it can take on negative values. So instead `V` is represented as a
|
||||
/// light-weight wrapper around a Vec plus an `offset` which is the maximum
|
||||
/// value `k` can take on in order to map negative `k`'s back to a value >= 0.
|
||||
/// We can't use a traditional Vec since we use `k` as an index and it can take
|
||||
/// on negative values. So instead this is a light-weight wrapper around a Vec
|
||||
/// plus an `offset` which is the maximum value `k` can take on, used to map
|
||||
/// negative `k`'s back to a value >= 0.
|
||||
#[derive(Debug)]
|
||||
struct V {
|
||||
struct FurthestEndpoints {
|
||||
offset: isize,
|
||||
v: Vec<usize>,
|
||||
endpoints: Vec<usize>,
|
||||
}
|
||||
|
||||
impl V {
|
||||
fn new(max_d: usize) -> Self {
|
||||
// max_d should fit in isize for the algorithm to work correctly
|
||||
let offset = isize::try_from(max_d).unwrap_or(isize::MAX);
|
||||
impl FurthestEndpoints {
|
||||
fn new(max_edit_distance: usize) -> Self {
|
||||
let offset =
|
||||
isize::try_from(max_edit_distance).expect("max_edit_distance must fit in isize");
|
||||
Self {
|
||||
offset,
|
||||
v: vec![0; 2 * max_d],
|
||||
endpoints: vec![0; 2 * max_edit_distance + 1],
|
||||
}
|
||||
}
|
||||
|
||||
fn len(&self) -> usize { self.v.len() }
|
||||
}
|
||||
|
||||
impl Index<isize> for V {
|
||||
type Output = usize;
|
||||
|
||||
fn index(&self, index: isize) -> &Self::Output {
|
||||
let idx = usize::try_from(index + self.offset).unwrap_or(usize::MAX);
|
||||
&self.v[idx.min(self.v.len().saturating_sub(1))]
|
||||
fn len(&self) -> usize {
|
||||
self.endpoints.len()
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexMut<isize> for V {
|
||||
fn index_mut(&mut self, index: isize) -> &mut Self::Output {
|
||||
let idx = usize::try_from(index + self.offset).unwrap_or(usize::MAX);
|
||||
let len = self.v.len();
|
||||
&mut self.v[idx.min(len.saturating_sub(1))]
|
||||
impl Index<isize> for FurthestEndpoints {
|
||||
type Output = usize;
|
||||
|
||||
fn index(&self, diagonal: isize) -> &Self::Output {
|
||||
let idx =
|
||||
usize::try_from(diagonal + self.offset).expect("diagonal + offset must fit in usize");
|
||||
&self.endpoints[idx]
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexMut<isize> for FurthestEndpoints {
|
||||
fn index_mut(&mut self, diagonal: isize) -> &mut Self::Output {
|
||||
let idx =
|
||||
usize::try_from(diagonal + self.offset).expect("diagonal + offset must fit in usize");
|
||||
&mut self.endpoints[idx]
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -118,6 +114,26 @@ fn split_at(range: Range<usize>, at: usize) -> (Range<usize>, Range<usize>) {
|
|||
(range.start..at, at..range.end)
|
||||
}
|
||||
|
||||
/// Adjust a lower diagonal bound so it has the same parity as `edit_distance`.
|
||||
/// Diagonals are visited in steps of 2, so `lower` must share `edit_distance`'s
|
||||
/// parity.
|
||||
fn align_lower_bound(lower: isize, edit_distance: isize) -> isize {
|
||||
if (lower & 1) == (edit_distance & 1) {
|
||||
lower
|
||||
} else {
|
||||
lower + 1
|
||||
}
|
||||
}
|
||||
|
||||
/// Adjust an upper diagonal bound so it has the same parity as `edit_distance`.
|
||||
fn align_upper_bound(upper: isize, edit_distance: isize) -> isize {
|
||||
if (upper & 1) == (edit_distance & 1) {
|
||||
upper
|
||||
} else {
|
||||
upper - 1
|
||||
}
|
||||
}
|
||||
|
||||
/// A `Snake` is a sequence of diagonal edges in the edit graph. Normally
|
||||
/// a snake has a start end end point (and it is possible for a snake to have
|
||||
/// a length of zero, meaning the start and end points are the same) however
|
||||
|
|
@ -134,103 +150,143 @@ fn find_middle_snake<T>(
|
|||
old_range: Range<usize>,
|
||||
new: &[Token<T>],
|
||||
new_range: Range<usize>,
|
||||
vf: &mut V,
|
||||
vb: &mut V,
|
||||
forward_endpoints: &mut FurthestEndpoints,
|
||||
backward_endpoints: &mut FurthestEndpoints,
|
||||
) -> Option<(usize, usize)>
|
||||
where
|
||||
T: PartialEq + Clone + Debug,
|
||||
{
|
||||
let n = old_range.len();
|
||||
let m = new_range.len();
|
||||
let old_len = old_range.len();
|
||||
let new_len = new_range.len();
|
||||
|
||||
let old_len_signed = isize::try_from(old_len).expect("old_len must fit in isize");
|
||||
let new_len_signed = isize::try_from(new_len).expect("new_len must fit in isize");
|
||||
|
||||
// By Lemma 1 in the paper, the optimal edit script length is odd or even as
|
||||
// `delta` is odd or even.
|
||||
let delta = isize::try_from(n).unwrap_or(isize::MAX) - isize::try_from(m).unwrap_or(isize::MAX);
|
||||
let odd = delta & 1 == 1;
|
||||
let delta = old_len_signed - new_len_signed;
|
||||
let delta_is_odd = delta & 1 == 1;
|
||||
|
||||
// The initial point at (0, -1)
|
||||
vf[1] = 0;
|
||||
forward_endpoints[1] = 0;
|
||||
// The initial point at (N, M+1)
|
||||
vb[1] = 0;
|
||||
backward_endpoints[1] = 0;
|
||||
|
||||
let d_max = (n + m).div_ceil(2) + 1;
|
||||
assert!(vf.len() >= d_max);
|
||||
assert!(vb.len() >= d_max);
|
||||
let max_edit_distance = (old_len + new_len).div_ceil(2) + 1;
|
||||
assert!(forward_endpoints.len() >= max_edit_distance);
|
||||
assert!(backward_endpoints.len() >= max_edit_distance);
|
||||
|
||||
let max_edit_distance_signed =
|
||||
isize::try_from(max_edit_distance).expect("max_edit_distance must fit in isize");
|
||||
|
||||
for edit_distance in 0..max_edit_distance_signed {
|
||||
// Tighter diagonal bounds: on diagonal k = x - y the constraints
|
||||
// 0 <= x <= old_len and 0 <= y <= new_len give k in [-new_len, old_len].
|
||||
// Intersect with the algorithm's [-edit_distance, edit_distance]
|
||||
// range and snap to the correct parity (k advances in steps of 2).
|
||||
let forward_diagonal_lo =
|
||||
align_lower_bound((-edit_distance).max(-new_len_signed), edit_distance);
|
||||
let forward_diagonal_hi =
|
||||
align_upper_bound(edit_distance.min(old_len_signed), edit_distance);
|
||||
|
||||
let d_max_isize = isize::try_from(d_max).unwrap_or(isize::MAX);
|
||||
for d in 0..d_max_isize {
|
||||
// Forward path
|
||||
for k in (-d..=d).rev().step_by(2) {
|
||||
let mut x = if k == -d || (k != d && vf[k - 1] < vf[k + 1]) {
|
||||
vf[k + 1]
|
||||
for diagonal in (forward_diagonal_lo..=forward_diagonal_hi).rev().step_by(2) {
|
||||
let mut old_idx = if diagonal == -edit_distance
|
||||
|| (diagonal != edit_distance
|
||||
&& forward_endpoints[diagonal - 1] < forward_endpoints[diagonal + 1])
|
||||
{
|
||||
forward_endpoints[diagonal + 1]
|
||||
} else {
|
||||
vf[k - 1] + 1
|
||||
forward_endpoints[diagonal - 1] + 1
|
||||
};
|
||||
let y = usize::try_from(isize::try_from(x).unwrap_or(isize::MAX) - k).unwrap_or(0);
|
||||
let new_idx = usize::try_from(
|
||||
isize::try_from(old_idx).expect("old_idx must fit in isize") - diagonal,
|
||||
)
|
||||
.expect("old_idx - diagonal must be non-negative and fit in usize");
|
||||
|
||||
// The coordinate of the start of a snake
|
||||
let (x0, y0) = (x, y);
|
||||
// While these sequences are identical, keep moving through the
|
||||
// graph with no cost
|
||||
if x < old_range.len() && y < new_range.len() {
|
||||
let (snake_start_old, snake_start_new) = (old_idx, new_idx);
|
||||
|
||||
// While these sequences are identical, keep moving through the
|
||||
// graph with no cost
|
||||
if old_idx < old_range.len() && new_idx < new_range.len() {
|
||||
let advance = common_prefix_len(
|
||||
old,
|
||||
old_range.start + x..old_range.end,
|
||||
old_range.start + old_idx..old_range.end,
|
||||
new,
|
||||
new_range.start + y..new_range.end,
|
||||
new_range.start + new_idx..new_range.end,
|
||||
);
|
||||
x += advance;
|
||||
old_idx += advance;
|
||||
}
|
||||
|
||||
// This is the new best x value
|
||||
vf[k] = x;
|
||||
forward_endpoints[diagonal] = old_idx;
|
||||
|
||||
// Only check for connections from the forward search when N - M is
|
||||
// odd and when there is a reciprocal k line coming from the other
|
||||
// direction.
|
||||
if odd && (k - delta).abs() <= (d - 1) {
|
||||
// TODO optimise this so we don't have to compare against n
|
||||
if vf[k] + vb[-(k - delta)] >= n {
|
||||
// Return the snake
|
||||
return Some((x0 + old_range.start, y0 + new_range.start));
|
||||
}
|
||||
// direction. Forward diagonal k maps to backward diagonal
|
||||
// (delta - k). Overlap occurs when the combined forward + backward
|
||||
// reach covers the full width:
|
||||
// forward_endpoints[k] + backward_endpoints[delta - k] >= old_len.
|
||||
if delta_is_odd
|
||||
&& (diagonal - delta).abs() <= (edit_distance - 1)
|
||||
&& forward_endpoints[diagonal] + backward_endpoints[-(diagonal - delta)] >= old_len
|
||||
{
|
||||
return Some((
|
||||
snake_start_old + old_range.start,
|
||||
snake_start_new + new_range.start,
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// Backward path
|
||||
for k in (-d..=d).rev().step_by(2) {
|
||||
let mut x = if k == -d || (k != d && vb[k - 1] < vb[k + 1]) {
|
||||
vb[k + 1]
|
||||
} else {
|
||||
vb[k - 1] + 1
|
||||
};
|
||||
let mut y = usize::try_from(isize::try_from(x).unwrap_or(isize::MAX) - k).unwrap_or(0);
|
||||
let backward_diagonal_lo =
|
||||
align_lower_bound((-edit_distance).max(-new_len_signed), edit_distance);
|
||||
let backward_diagonal_hi =
|
||||
align_upper_bound(edit_distance.min(old_len_signed), edit_distance);
|
||||
|
||||
// The coordinate of the start of a snake
|
||||
if x < n && y < m {
|
||||
// Backward path
|
||||
for diagonal in (backward_diagonal_lo..=backward_diagonal_hi)
|
||||
.rev()
|
||||
.step_by(2)
|
||||
{
|
||||
let mut old_idx = if diagonal == -edit_distance
|
||||
|| (diagonal != edit_distance
|
||||
&& backward_endpoints[diagonal - 1] < backward_endpoints[diagonal + 1])
|
||||
{
|
||||
backward_endpoints[diagonal + 1]
|
||||
} else {
|
||||
backward_endpoints[diagonal - 1] + 1
|
||||
};
|
||||
let mut new_idx = usize::try_from(
|
||||
isize::try_from(old_idx).expect("old_idx must fit in isize") - diagonal,
|
||||
)
|
||||
.expect("old_idx - diagonal must be non-negative and fit in usize");
|
||||
|
||||
// Extend the snake backward (matching suffix)
|
||||
if old_idx < old_len && new_idx < new_len {
|
||||
let advance = common_suffix_len(
|
||||
old,
|
||||
old_range.start..old_range.start + n - x,
|
||||
old_range.start..old_range.start + old_len - old_idx,
|
||||
new,
|
||||
new_range.start..new_range.start + m - y,
|
||||
new_range.start..new_range.start + new_len - new_idx,
|
||||
);
|
||||
x += advance;
|
||||
y += advance;
|
||||
old_idx += advance;
|
||||
new_idx += advance;
|
||||
}
|
||||
|
||||
// This is the new best x value
|
||||
vb[k] = x;
|
||||
backward_endpoints[diagonal] = old_idx;
|
||||
|
||||
if !odd && (k - delta).abs() <= d {
|
||||
// TODO optimise this so we don't have to compare against n
|
||||
if vb[k] + vf[-(k - delta)] >= n {
|
||||
// Return the snake
|
||||
return Some((n - x + old_range.start, m - y + new_range.start));
|
||||
}
|
||||
if !delta_is_odd
|
||||
&& (diagonal - delta).abs() <= edit_distance
|
||||
&& backward_endpoints[diagonal] + forward_endpoints[-(diagonal - delta)] >= old_len
|
||||
{
|
||||
return Some((
|
||||
old_len - old_idx + old_range.start,
|
||||
new_len - new_idx + new_range.start,
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Maybe there's an opportunity to optimise and bail early?
|
||||
}
|
||||
|
||||
None
|
||||
|
|
@ -241,54 +297,72 @@ fn conquer<T>(
|
|||
mut old_range: Range<usize>,
|
||||
new: &[Token<T>],
|
||||
mut new_range: Range<usize>,
|
||||
vf: &mut V,
|
||||
vb: &mut V,
|
||||
forward_endpoints: &mut FurthestEndpoints,
|
||||
backward_endpoints: &mut FurthestEndpoints,
|
||||
result: &mut Vec<RawOperation<T>>,
|
||||
) where
|
||||
T: PartialEq + Clone + Debug,
|
||||
{
|
||||
// Check for common prefix
|
||||
let common_prefix_len = common_prefix_len(old, old_range.clone(), new, new_range.clone());
|
||||
if common_prefix_len > 0 {
|
||||
let prefix_len = common_prefix_len(old, old_range.clone(), new, new_range.clone());
|
||||
if prefix_len > 0 {
|
||||
result.extend(
|
||||
old[old_range.start..old_range.start + common_prefix_len]
|
||||
old[old_range.start..old_range.start + prefix_len]
|
||||
.iter()
|
||||
.map(|token| RawOperation::Equal(vec![token.clone()])),
|
||||
);
|
||||
}
|
||||
old_range.start += common_prefix_len;
|
||||
new_range.start += common_prefix_len;
|
||||
old_range.start += prefix_len;
|
||||
new_range.start += prefix_len;
|
||||
|
||||
// Check for common suffix
|
||||
let common_suffix_len = common_suffix_len(old, old_range.clone(), new, new_range.clone());
|
||||
let common_suffix = (
|
||||
old_range.end - common_suffix_len,
|
||||
new_range.end - common_suffix_len,
|
||||
);
|
||||
old_range.end -= common_suffix_len;
|
||||
new_range.end -= common_suffix_len;
|
||||
let suffix_len = common_suffix_len(old, old_range.clone(), new, new_range.clone());
|
||||
let suffix_start = old_range.end - suffix_len;
|
||||
old_range.end -= suffix_len;
|
||||
new_range.end -= suffix_len;
|
||||
|
||||
if old_range.is_empty() && new_range.is_empty() {
|
||||
// do nothing
|
||||
} else if new_range.is_empty() {
|
||||
result.extend(
|
||||
old[old_range.start..old_range.start + old_range.len()]
|
||||
old[old_range.start..old_range.end]
|
||||
.iter()
|
||||
.map(|token| RawOperation::Delete(vec![token.clone()])),
|
||||
);
|
||||
} else if old_range.is_empty() {
|
||||
result.extend(
|
||||
new[new_range.start..new_range.start + new_range.len()]
|
||||
new[new_range.start..new_range.end]
|
||||
.iter()
|
||||
.map(|token| RawOperation::Insert(vec![token.clone()])),
|
||||
);
|
||||
} else if let Some((x_start, y_start)) =
|
||||
find_middle_snake(old, old_range.clone(), new, new_range.clone(), vf, vb)
|
||||
{
|
||||
let (old_a, old_b) = split_at(old_range, x_start);
|
||||
let (new_a, new_b) = split_at(new_range, y_start);
|
||||
conquer(old, old_a, new, new_a, vf, vb, result);
|
||||
conquer(old, old_b, new, new_b, vf, vb, result);
|
||||
} else if let Some((split_old, split_new)) = find_middle_snake(
|
||||
old,
|
||||
old_range.clone(),
|
||||
new,
|
||||
new_range.clone(),
|
||||
forward_endpoints,
|
||||
backward_endpoints,
|
||||
) {
|
||||
let (old_before, old_after) = split_at(old_range, split_old);
|
||||
let (new_before, new_after) = split_at(new_range, split_new);
|
||||
conquer(
|
||||
old,
|
||||
old_before,
|
||||
new,
|
||||
new_before,
|
||||
forward_endpoints,
|
||||
backward_endpoints,
|
||||
result,
|
||||
);
|
||||
conquer(
|
||||
old,
|
||||
old_after,
|
||||
new,
|
||||
new_after,
|
||||
forward_endpoints,
|
||||
backward_endpoints,
|
||||
result,
|
||||
);
|
||||
} else {
|
||||
result.extend(
|
||||
old[old_range.start..old_range.end]
|
||||
|
|
@ -302,9 +376,9 @@ fn conquer<T>(
|
|||
);
|
||||
}
|
||||
|
||||
if common_suffix_len > 0 {
|
||||
if suffix_len > 0 {
|
||||
result.extend(
|
||||
old[common_suffix.0..common_suffix.0 + common_suffix_len]
|
||||
old[suffix_start..suffix_start + suffix_len]
|
||||
.iter()
|
||||
.map(|token| RawOperation::Equal(vec![token.clone()])),
|
||||
);
|
||||
|
|
|
|||
|
|
@ -1,21 +1,32 @@
|
|||
use std::iter::Iterator;
|
||||
use std::{fmt, str::Chars};
|
||||
|
||||
/// A helper for building a string in-order based on an original string and a
|
||||
/// series of insertions, deletions, and copies applied to it. It is safe to use
|
||||
/// with UTF-8 strings as all operations are based on character indices. The
|
||||
/// methods must be called in-order.
|
||||
/// A helper for building a string sequentially from an original string via
|
||||
/// insertions, deletions, and copies. All operations use character counts,
|
||||
/// safe for UTF-8. Methods must be called in-order.
|
||||
pub struct StringBuilder<'a> {
|
||||
original: Box<dyn Iterator<Item = char> + 'a>,
|
||||
original: Chars<'a>,
|
||||
buffer: String,
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
remaining: String,
|
||||
}
|
||||
|
||||
impl fmt::Debug for StringBuilder<'_> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let mut debug_struct = f.debug_struct("StringBuilder");
|
||||
debug_struct.field("buffer", &self.buffer);
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
debug_struct.field("remaining", &self.remaining);
|
||||
|
||||
debug_struct.finish_non_exhaustive()
|
||||
}
|
||||
}
|
||||
|
||||
impl StringBuilder<'_> {
|
||||
pub fn new(original: &str) -> StringBuilder<'_> {
|
||||
StringBuilder {
|
||||
original: Box::new(original.chars()),
|
||||
original: original.chars(),
|
||||
buffer: String::with_capacity(original.len()),
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
|
|
@ -23,11 +34,13 @@ impl StringBuilder<'_> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Insert a string at the end of the built buffer.
|
||||
pub fn insert(&mut self, text: &str) { self.buffer.push_str(text); }
|
||||
/// Insert a string at the end of the built buffer
|
||||
pub fn insert(&mut self, text: &str) {
|
||||
self.buffer.push_str(text);
|
||||
}
|
||||
|
||||
/// Skip copying `length` characters from the original string to the built
|
||||
/// buffer.
|
||||
/// buffer
|
||||
pub fn delete(&mut self, length: usize) {
|
||||
if length == 0 {
|
||||
return;
|
||||
|
|
@ -41,7 +54,7 @@ impl StringBuilder<'_> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Copy `length` characters from the original string to the built buffer.
|
||||
/// Copy `length` characters from the original string to the built buffer
|
||||
pub fn retain(&mut self, length: usize) {
|
||||
self.buffer.extend(self.original.by_ref().take(length));
|
||||
|
||||
|
|
@ -53,7 +66,9 @@ impl StringBuilder<'_> {
|
|||
|
||||
/// Returns the currently built buffer and clears it to allow consuming
|
||||
/// the result incrementally.
|
||||
pub fn take(&mut self) -> String { std::mem::take(&mut self.buffer) }
|
||||
pub fn take(&mut self) -> String {
|
||||
std::mem::take(&mut self.buffer)
|
||||
}
|
||||
|
||||
/// Get a slice of the remaining original string. The slice starts from
|
||||
/// where the next delete/retain operation would start and is of length
|
||||
|
|
|
|||
139
src/wasm.rs
139
src/wasm.rs
|
|
@ -3,12 +3,9 @@ use core::str;
|
|||
|
||||
use wasm_bindgen::prelude::*;
|
||||
|
||||
use crate::{BuiltinTokenizer, CursorPosition, SpanWithHistory, TextWithCursors};
|
||||
use crate::{BuiltinTokenizer, CursorPosition, EditedText, SpanWithHistory, TextWithCursors};
|
||||
|
||||
#[global_allocator]
|
||||
static ALLOC: wee_alloc::WeeAlloc<'_> = wee_alloc::WeeAlloc::INIT;
|
||||
|
||||
/// WASM wrapper around `crate::reconcile` for merging text.
|
||||
/// WASM wrapper around `crate::reconcile` for merging text
|
||||
#[wasm_bindgen(js_name = reconcile)]
|
||||
#[must_use]
|
||||
pub fn reconcile(
|
||||
|
|
@ -22,7 +19,7 @@ pub fn reconcile(
|
|||
crate::reconcile(parent, left, right, &*tokenizer).apply()
|
||||
}
|
||||
|
||||
/// WASM wrapper around `crate::reconcile` for merging text.
|
||||
/// WASM wrapper around `crate::reconcile` that also returns provenance history
|
||||
#[wasm_bindgen(js_name = reconcileWithHistory)]
|
||||
#[must_use]
|
||||
pub fn reconcile_with_history(
|
||||
|
|
@ -32,12 +29,13 @@ pub fn reconcile_with_history(
|
|||
tokenizer: BuiltinTokenizer,
|
||||
) -> TextWithCursorsAndHistory {
|
||||
set_panic_hook();
|
||||
|
||||
let reconciled = crate::reconcile(parent, left, right, &*tokenizer);
|
||||
let text_with_cursors = reconciled.apply();
|
||||
let (text_with_cursors, history) = reconciled.apply_with_all();
|
||||
|
||||
TextWithCursorsAndHistory {
|
||||
text_with_cursors,
|
||||
history: reconciled.apply_with_history(),
|
||||
history,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -47,17 +45,13 @@ pub fn reconcile_with_history(
|
|||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// - `parent`: The common parent document.
|
||||
/// - `left`: The left document updated by one user.
|
||||
/// - `right`: The right document updated by another user.
|
||||
/// - `parent`: The common parent document
|
||||
/// - `left`: The left document updated by one user
|
||||
/// - `right`: The right document updated by another user
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// The merged document.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// If any of the input documents are not valid UTF-8 strings.
|
||||
/// The merged document
|
||||
#[wasm_bindgen(js_name = genericReconcile)]
|
||||
#[must_use]
|
||||
pub fn generic_reconcile(
|
||||
|
|
@ -68,32 +62,64 @@ pub fn generic_reconcile(
|
|||
) -> Vec<u8> {
|
||||
set_panic_hook();
|
||||
|
||||
if crate::is_binary(parent) || crate::is_binary(left) || crate::is_binary(right) {
|
||||
right.to_vec()
|
||||
if let (Some(parent), Some(left), Some(right)) = (
|
||||
string_or_nothing(parent),
|
||||
string_or_nothing(left),
|
||||
string_or_nothing(right),
|
||||
) {
|
||||
crate::reconcile(&parent, &left.into(), &right.into(), &*tokenizer)
|
||||
.apply()
|
||||
.text()
|
||||
.into_bytes()
|
||||
} else {
|
||||
crate::reconcile(
|
||||
str::from_utf8(parent).expect("parent must be valid UTF-8 because it's not binary"),
|
||||
&str::from_utf8(left)
|
||||
.expect("left must be valid UTF-8 because it's not binary")
|
||||
.into(),
|
||||
&str::from_utf8(right)
|
||||
.expect("right must be valid UTF-8 because it's not binary")
|
||||
.into(),
|
||||
&*tokenizer,
|
||||
)
|
||||
.apply()
|
||||
.text()
|
||||
.into_bytes()
|
||||
right.to_vec()
|
||||
}
|
||||
}
|
||||
|
||||
/// Heuristically determine if the given data is a binary or a text file's
|
||||
/// content.
|
||||
#[wasm_bindgen(js_name = isBinary)]
|
||||
#[must_use]
|
||||
pub fn is_binary(data: &[u8]) -> bool {
|
||||
/// WASM wrapper around getting a compact diff representation of two texts as a
|
||||
/// list of numbers and strings
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns a JS error if integer overflow occurs during diff computation.
|
||||
#[wasm_bindgen(js_name = diff)]
|
||||
pub fn diff(
|
||||
parent: &str,
|
||||
changed: &TextWithCursors,
|
||||
tokenizer: BuiltinTokenizer,
|
||||
) -> Result<Vec<JsValue>, JsValue> {
|
||||
set_panic_hook();
|
||||
crate::is_binary(data)
|
||||
|
||||
let edited_text = EditedText::from_strings_with_tokenizer(parent, changed, &*tokenizer);
|
||||
edited_text
|
||||
.to_diff()
|
||||
.map(|diff| diff.into_iter().map(std::convert::Into::into).collect())
|
||||
.map_err(|e| JsValue::from_str(&e.to_string()))
|
||||
}
|
||||
|
||||
/// Inverse of `diff`, applies a compact diff representation to a parent text
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns a JS error if the diff format is invalid or references ranges
|
||||
/// exceeding the original text length.
|
||||
#[wasm_bindgen(js_name = undiff)]
|
||||
pub fn undiff(
|
||||
parent: &str,
|
||||
diff: Vec<JsValue>,
|
||||
tokenizer: BuiltinTokenizer,
|
||||
) -> Result<String, JsValue> {
|
||||
set_panic_hook();
|
||||
|
||||
let parsed_diff: Vec<_> = diff
|
||||
.into_iter()
|
||||
.map(std::convert::TryInto::try_into)
|
||||
.collect::<Result<_, _>>()
|
||||
.map_err(|e: crate::types::number_or_text::DeserialisationError| -> JsValue { e.into() })?;
|
||||
|
||||
EditedText::from_diff(parent, parsed_diff, &*tokenizer)
|
||||
.map(|edited_text| edited_text.apply().text())
|
||||
.map_err(|e| JsValue::from_str(&e.to_string()))
|
||||
}
|
||||
|
||||
fn set_panic_hook() {
|
||||
|
|
@ -113,11 +139,44 @@ pub struct TextWithCursorsAndHistory {
|
|||
#[wasm_bindgen]
|
||||
impl TextWithCursorsAndHistory {
|
||||
#[must_use]
|
||||
pub fn text(&self) -> String { self.text_with_cursors.text() }
|
||||
pub fn text(&self) -> String {
|
||||
self.text_with_cursors.text()
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn cursors(&self) -> Vec<CursorPosition> { self.text_with_cursors.cursors() }
|
||||
pub fn cursors(&self) -> Vec<CursorPosition> {
|
||||
self.text_with_cursors.cursors()
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn history(&self) -> Vec<SpanWithHistory> { self.history.clone() }
|
||||
pub fn history(&self) -> Vec<SpanWithHistory> {
|
||||
self.history.clone()
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the UTF8 parsed string if it's a text, or `None` if it's likely
|
||||
/// binary.
|
||||
#[must_use]
|
||||
fn string_or_nothing(data: &[u8]) -> Option<String> {
|
||||
if data.contains(&0) {
|
||||
// Even though the NUL character is valid in UTF-8, it's highly suspicious in
|
||||
// human-readable text.
|
||||
return None;
|
||||
}
|
||||
|
||||
std::str::from_utf8(data)
|
||||
.map(std::borrow::ToOwned::to_owned)
|
||||
.ok()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_string_or_nothing() {
|
||||
assert_eq!(string_or_nothing(&[0, 159, 146, 150]), None);
|
||||
assert_eq!(string_or_nothing(&[0, 12]), None);
|
||||
assert_eq!(string_or_nothing(b"hello"), Some("hello".into()));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,7 +18,9 @@ pub struct ExampleDocument {
|
|||
|
||||
impl ExampleDocument {
|
||||
#[must_use]
|
||||
pub fn parent(&self) -> String { self.parent.clone() }
|
||||
pub fn parent(&self) -> String {
|
||||
self.parent.clone()
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn left(&self) -> TextWithCursors {
|
||||
|
|
@ -65,9 +67,9 @@ impl ExampleDocument {
|
|||
let mut result = merged.text();
|
||||
for (i, cursor) in merged.cursors().iter().enumerate() {
|
||||
assert!(
|
||||
cursor.char_index <= result.len(), // equals in case of insert at the end
|
||||
cursor.char_index() <= result.len(), // equals in case of insert at the end
|
||||
"Cursor index out of bounds: {} > {} when testing for '{}.'",
|
||||
cursor.char_index,
|
||||
cursor.char_index(),
|
||||
result.len(),
|
||||
result
|
||||
);
|
||||
|
|
@ -75,7 +77,7 @@ impl ExampleDocument {
|
|||
result.insert(
|
||||
result
|
||||
.char_indices()
|
||||
.nth(cursor.char_index + i)
|
||||
.nth(cursor.char_index() + i)
|
||||
.map_or_else(|| result.len(), |(byte_index, _)| byte_index), /* find the utf8 char index of the insert
|
||||
* in byte index */
|
||||
'|',
|
||||
|
|
@ -94,10 +96,7 @@ impl ExampleDocument {
|
|||
let mut cursors = Vec::new();
|
||||
for (i, c) in text.chars().enumerate() {
|
||||
if c == '|' {
|
||||
cursors.push(CursorPosition {
|
||||
id: 0,
|
||||
char_index: i - cursors.len(),
|
||||
});
|
||||
cursors.push(CursorPosition::new(0, i - cursors.len()));
|
||||
}
|
||||
}
|
||||
cursors
|
||||
|
|
|
|||
|
|
@ -34,6 +34,44 @@ fn test_document_one_way_with_cursors() {
|
|||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
#[test]
|
||||
fn test_document_one_way_with_serialisation() {
|
||||
use reconcile_text::EditedText;
|
||||
|
||||
for doc in &get_all_documents() {
|
||||
let parent = doc.parent();
|
||||
let left_operations =
|
||||
EditedText::from_strings_with_tokenizer(&parent, &doc.left(), &*BuiltinTokenizer::Word);
|
||||
let right_operations = EditedText::from_strings_with_tokenizer(
|
||||
&parent,
|
||||
&doc.right(),
|
||||
&*BuiltinTokenizer::Word,
|
||||
);
|
||||
|
||||
let serialised_left = serde_yaml::from_str(
|
||||
&serde_yaml::to_string(&left_operations.to_diff().unwrap()).unwrap(),
|
||||
)
|
||||
.unwrap();
|
||||
let serialised_right = serde_yaml::from_str(
|
||||
&serde_yaml::to_string(&right_operations.to_diff().unwrap()).unwrap(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let restored_left_operations =
|
||||
EditedText::from_diff(&parent, serialised_left, &*BuiltinTokenizer::Word).unwrap();
|
||||
let restored_right_operations =
|
||||
EditedText::from_diff(&parent, serialised_right, &*BuiltinTokenizer::Word).unwrap();
|
||||
|
||||
doc.assert_eq_without_cursors(
|
||||
&restored_left_operations
|
||||
.merge(restored_right_operations)
|
||||
.apply()
|
||||
.text(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_document_inverse_way_without_cursors() {
|
||||
for doc in &get_all_documents() {
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@ fn test_merge_text_with_cursors() {
|
|||
}
|
||||
|
||||
#[wasm_bindgen_test(unsupported = test)]
|
||||
fn merge_binary() {
|
||||
fn test_merge_binary() {
|
||||
let left = [0, 1, 2];
|
||||
let right = [3, 4, 5];
|
||||
assert_eq!(
|
||||
|
|
@ -55,14 +55,16 @@ fn merge_binary() {
|
|||
);
|
||||
}
|
||||
|
||||
#[wasm_bindgen_test(unsupported = test)]
|
||||
fn test_is_binary() {
|
||||
assert!(is_binary(&[0, 159, 146, 150]));
|
||||
assert!(is_binary(&[0, 12]));
|
||||
assert!(!is_binary(b"hello"));
|
||||
}
|
||||
#[wasm_bindgen_test] // JsValue isn't supported outside of wasm
|
||||
fn test_diff() {
|
||||
let parent = "hello ";
|
||||
let changed = "world";
|
||||
|
||||
#[wasm_bindgen_test(unsupported = test)]
|
||||
fn test_is_binary_empty() {
|
||||
assert!(!is_binary(b""));
|
||||
let result = diff(parent, &changed.into(), BuiltinTokenizer::Word).unwrap();
|
||||
|
||||
assert_eq!(result.len(), 2);
|
||||
let first: i64 = result[0].clone().try_into().unwrap();
|
||||
let second: String = result[1].clone().try_into().unwrap();
|
||||
assert_eq!(first, -6);
|
||||
assert_eq!(second, "world");
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue