diff --git a/.forgejo/workflows/check.yml b/.forgejo/workflows/check.yml new file mode 100644 index 0000000..6e13d91 --- /dev/null +++ b/.forgejo/workflows/check.yml @@ -0,0 +1,74 @@ +name: Check + +on: + push: + branches: ['main'] + pull_request: + branches: ['main'] + +env: + CARGO_TERM_COLOR: always + RUSTFLAGS: '-Dwarnings' + +jobs: + build: + runs-on: docker + + steps: + - uses: actions/checkout@v4 + + - name: Setup Node + uses: actions/setup-node@v4 + with: + node-version: '22.x' + check-latest: true + + - name: Cache Rust dependencies + uses: actions/cache@v4 + with: + path: | + ~/.cargo/bin/ + ~/.cargo/registry/index/ + ~/.cargo/registry/cache/ + ~/.cargo/git/db/ + target/ + key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} + restore-keys: | + ${{ runner.os }}-cargo- + + - name: Cache npm dependencies + uses: actions/cache@v4 + with: + path: | + reconcile-js/node_modules + examples/website/node_modules + ~/.npm + key: >- + ${{ runner.os }}-npm-${{ + hashFiles( + 'reconcile-js/package-lock.json', + 'examples/website/package-lock.json' + ) + }} + restore-keys: | + ${{ runner.os }}-npm- + + - name: Install Rust toolchain + run: | + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs \ + | sh -s -- -y --default-toolchain none --profile minimal + echo "$HOME/.cargo/bin" >> "$GITHUB_PATH" + + - name: Install uv + run: | + curl --proto '=https' --tlsv1.2 -LsSf https://astral.sh/uv/install.sh | sh + echo "$HOME/.local/bin" >> "$GITHUB_PATH" + + - name: Lint + run: scripts/lint.sh + + - name: Test + run: scripts/test.sh + + - name: Build website + run: scripts/build-website.sh diff --git a/.forgejo/workflows/publish.yml b/.forgejo/workflows/publish.yml new file mode 100644 index 0000000..bbaf253 --- /dev/null +++ b/.forgejo/workflows/publish.yml @@ -0,0 +1,265 @@ +name: Publish + +on: + push: + branches: ['main'] + tags: ['*'] + workflow_dispatch: + +env: + CARGO_TERM_COLOR: always + RUSTFLAGS: '-Dwarnings' + +concurrency: + group: 'pages' + cancel-in-progress: false + +jobs: + build: + runs-on: docker + + steps: + - uses: actions/checkout@v4 + + - name: Setup Node + uses: actions/setup-node@v4 + with: + node-version: '22.x' + check-latest: true + + - name: Cache Rust dependencies + uses: actions/cache@v4 + with: + path: | + ~/.cargo/bin/ + ~/.cargo/registry/index/ + ~/.cargo/registry/cache/ + ~/.cargo/git/db/ + target/ + key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} + restore-keys: | + ${{ runner.os }}-cargo- + + - name: Cache npm dependencies + uses: actions/cache@v4 + with: + path: | + reconcile-js/node_modules + examples/website/node_modules + ~/.npm + key: >- + ${{ runner.os }}-npm-${{ + hashFiles( + 'reconcile-js/package-lock.json', + 'examples/website/package-lock.json' + ) + }} + restore-keys: | + ${{ runner.os }}-npm- + + - name: Install Rust toolchain + run: | + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs \ + | sh -s -- -y --default-toolchain none --profile minimal + echo "$HOME/.cargo/bin" >> "$GITHUB_PATH" + + - name: Install uv + run: | + curl --proto '=https' --tlsv1.2 -LsSf https://astral.sh/uv/install.sh | sh + echo "$HOME/.local/bin" >> "$GITHUB_PATH" + + - name: Lint + run: scripts/lint.sh + + - name: Test + run: scripts/test.sh + + - name: Build website + run: scripts/build-website.sh + + - name: Deploy to pages mount + if: github.event_name == 'push' && github.ref == 'refs/heads/main' + run: | + apt-get update && apt-get install -y rsync + rsync -a --delete examples/website/dist/ /pages/reconcile + + publish-crate: + needs: build + runs-on: docker + if: startsWith(github.ref, 'refs/tags/') + + steps: + - uses: actions/checkout@v4 + + - name: Cache Rust dependencies + uses: actions/cache@v4 + with: + path: | + ~/.cargo/bin/ + ~/.cargo/registry/index/ + ~/.cargo/registry/cache/ + ~/.cargo/git/db/ + target/ + key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} + restore-keys: | + ${{ runner.os }}-cargo- + + - name: Install Rust toolchain + run: | + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs \ + | sh -s -- -y --default-toolchain none --profile minimal + echo "$HOME/.cargo/bin" >> "$GITHUB_PATH" + + - name: Publish to crates.io + run: cargo publish --token ${{ secrets.CRATES_IO_TOKEN }} + + publish-npm: + needs: build + runs-on: docker + if: startsWith(github.ref, 'refs/tags/') + + steps: + - uses: actions/checkout@v4 + + - name: Setup Node + uses: actions/setup-node@v4 + with: + node-version: '22.x' + check-latest: true + registry-url: 'https://registry.npmjs.org' + + - name: Cache Rust dependencies + uses: actions/cache@v4 + with: + path: | + ~/.cargo/bin/ + ~/.cargo/registry/index/ + ~/.cargo/registry/cache/ + ~/.cargo/git/db/ + target/ + key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} + restore-keys: | + ${{ runner.os }}-cargo- + + - name: Cache npm dependencies + uses: actions/cache@v4 + with: + path: | + reconcile-js/node_modules + ~/.npm + key: >- + ${{ runner.os }}-npm-${{ + hashFiles('reconcile-js/package-lock.json') + }} + restore-keys: | + ${{ runner.os }}-npm- + + - name: Install Rust toolchain + run: | + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs \ + | sh -s -- -y --default-toolchain none --profile minimal + echo "$HOME/.cargo/bin" >> "$GITHUB_PATH" + + - name: Build website + run: scripts/build-website.sh + + - name: Publish reconcile-js to NPM + run: | + cd reconcile-js + cp ../README.md . + npm publish + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + + publish-pypi: + needs: build + runs-on: docker + if: startsWith(github.ref, 'refs/tags/') + + steps: + - uses: actions/checkout@v4 + + - name: Cache Rust dependencies + uses: actions/cache@v4 + with: + path: | + ~/.cargo/bin/ + ~/.cargo/registry/index/ + ~/.cargo/registry/cache/ + ~/.cargo/git/db/ + target/ + key: ${{ runner.os }}-cargo-pypi-${{ hashFiles('**/Cargo.lock') }} + restore-keys: | + ${{ runner.os }}-cargo-pypi- + ${{ runner.os }}-cargo- + + # clang/lld/llvm provide clang-cl, lld-link and llvm-lib, which cargo-xwin + # uses to cross-compile the Windows wheel from this Linux runner. + - name: Install cross-compilation system dependencies + run: | + apt-get update + apt-get install -y clang lld llvm + + - name: Install Rust toolchain + run: | + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs \ + | sh -s -- -y --default-toolchain none --profile minimal + echo "$HOME/.cargo/bin" >> "$GITHUB_PATH" + + # The Linux targets ship in rust-toolchain.toml; add the cross targets. + - name: Add cross-compilation Rust targets + run: | + rustup target add aarch64-unknown-linux-gnu x86_64-pc-windows-msvc + + # zig is the C toolchain maturin's `--zig` uses to produce manylinux2014 + # wheels with a pinned (old) glibc, independent of the runner's glibc. + - name: Install zig + run: | + ZIG_VERSION=0.13.0 + curl --proto '=https' --tlsv1.2 -fLsS \ + "https://ziglang.org/download/${ZIG_VERSION}/zig-linux-x86_64-${ZIG_VERSION}.tar.xz" \ + | tar -xJ + echo "$PWD/zig-linux-x86_64-${ZIG_VERSION}" >> "$GITHUB_PATH" + + - name: Install cargo-xwin + run: command -v cargo-xwin || cargo install --locked cargo-xwin + + - name: Install uv + run: | + curl --proto '=https' --tlsv1.2 -LsSf https://astral.sh/uv/install.sh | sh + echo "$HOME/.local/bin" >> "$GITHUB_PATH" + + - name: Copy README + run: cp README.md reconcile-python/ + + - name: Build sdist + working-directory: reconcile-python + run: uv run maturin sdist --out dist + + - name: Build Linux x86_64 wheel + working-directory: reconcile-python + run: >- + uv run maturin build --release --out dist + --compatibility manylinux2014 + --target x86_64-unknown-linux-gnu --zig + + - name: Build Linux aarch64 wheel + working-directory: reconcile-python + run: >- + uv run maturin build --release --out dist + --compatibility manylinux2014 + --target aarch64-unknown-linux-gnu --zig + + - name: Build Windows x86_64 wheel + working-directory: reconcile-python + run: >- + uv run maturin build --release --out dist + --target x86_64-pc-windows-msvc + + # Forgejo cannot use PyPI trusted publishing (OIDC), so authenticate with + # an API token. --skip-existing makes re-runs of a tag idempotent. + - name: Publish to PyPI + working-directory: reconcile-python + env: + MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} + run: uv run maturin upload --skip-existing dist/* diff --git a/.github/dependabot.yml b/.github/dependabot.yml deleted file mode 100644 index f5af792..0000000 --- a/.github/dependabot.yml +++ /dev/null @@ -1,26 +0,0 @@ -# To get started with Dependabot version updates, you'll need to specify which -# package ecosystems to update and where the package manifests are located. -# Please see the documentation for all configuration options: -# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file - -version: 2 -updates: - - package-ecosystem: 'cargo' - directories: ['**'] - schedule: - interval: 'daily' - - - package-ecosystem: 'github-actions' - directories: ['**'] - schedule: - interval: 'daily' - - - package-ecosystem: 'npm' - directories: ['/reconcile-js', '/examples/website'] - schedule: - interval: 'daily' - - - package-ecosystem: 'pip' - directories: ['/reconcile-python'] - schedule: - interval: 'daily' diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml deleted file mode 100644 index 261031d..0000000 --- a/.github/workflows/check.yml +++ /dev/null @@ -1,198 +0,0 @@ -name: Check & publish - -on: - push: - branches: ['main'] - tags: ['*'] - pull_request: - branches: ['main'] - -env: - CARGO_TERM_COLOR: always - RUSTFLAGS: '-Dwarnings' - -jobs: - build: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v6 - - - name: Setup Node.js environment - uses: actions/setup-node@v6.3.0 - with: - node-version: '22.x' - check-latest: true - - - name: Install uv - uses: astral-sh/setup-uv@v7 - - - name: Cache Rust dependencies - uses: actions/cache@v5 - with: - path: | - ~/.cargo/bin/ - ~/.cargo/registry/index/ - ~/.cargo/registry/cache/ - ~/.cargo/git/db/ - target/ - key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} - restore-keys: | - ${{ runner.os }}-cargo- - - - name: Lint - run: scripts/lint.sh - - - name: Test - run: scripts/test.sh - - publish-crate: - needs: build - runs-on: ubuntu-latest - if: startsWith(github.ref, 'refs/tags/') - - steps: - - uses: actions/checkout@v6 - - - name: Cache Rust dependencies - uses: actions/cache@v5 - with: - path: | - ~/.cargo/bin/ - ~/.cargo/registry/index/ - ~/.cargo/registry/cache/ - ~/.cargo/git/db/ - target/ - key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} - restore-keys: | - ${{ runner.os }}-cargo- - - - name: Publish to crates.io - run: cargo publish --token ${{ secrets.CRATES_IO_TOKEN }} - - publish-npm: - needs: build - runs-on: ubuntu-latest - if: startsWith(github.ref, 'refs/tags/') - permissions: - contents: read - id-token: write - - steps: - - uses: actions/checkout@v6 - - - name: Setup Node.js environment - uses: actions/setup-node@v6.3.0 - with: - node-version: '22.x' - check-latest: true - registry-url: 'https://registry.npmjs.org' - - - name: Cache Rust dependencies - uses: actions/cache@v5 - with: - path: | - ~/.cargo/bin/ - ~/.cargo/registry/index/ - ~/.cargo/registry/cache/ - ~/.cargo/git/db/ - target/ - key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} - restore-keys: | - ${{ runner.os }}-cargo- - - - name: Cache npm dependencies - uses: actions/cache@v5 - with: - path: | - reconcile-js/node_modules - ~/.npm - key: ${{ runner.os }}-npm-${{ hashFiles('reconcile-js/package-lock.json') }} - restore-keys: | - ${{ runner.os }}-npm- - - - name: Build website - run: scripts/build-website.sh - - - name: Publish reconcile-js to NPM - run: | - cd reconcile-js - cp ../README.md . - npm publish --provenance --access public - - build-python-wheels: - needs: build - if: startsWith(github.ref, 'refs/tags/') - strategy: - matrix: - include: - - os: ubuntu-latest - target: x86_64 - - os: ubuntu-latest - target: aarch64 - - os: macos-latest - target: x86_64 - - os: macos-latest - target: aarch64 - - os: windows-latest - target: x86_64 - runs-on: ${{ matrix.os }} - - steps: - - uses: actions/checkout@v6 - - - uses: actions/setup-python@v6 - with: - python-version: '3.x' - - - name: Copy README - run: cp README.md reconcile-python/ - - - uses: PyO3/maturin-action@v1 - with: - target: ${{ matrix.target }} - args: --release --out dist --find-interpreter - manylinux: auto - working-directory: reconcile-python - - - uses: actions/upload-artifact@v7 - with: - name: wheels-${{ matrix.os }}-${{ matrix.target }} - path: reconcile-python/dist/*.whl - - build-python-sdist: - needs: build - if: startsWith(github.ref, 'refs/tags/') - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v6 - - - name: Copy README - run: cp README.md reconcile-python/ - - - uses: PyO3/maturin-action@v1 - with: - command: sdist - args: --out dist - working-directory: reconcile-python - - - uses: actions/upload-artifact@v7 - with: - name: sdist - path: reconcile-python/dist/*.tar.gz - - publish-pypi: - needs: [build-python-wheels, build-python-sdist] - runs-on: ubuntu-latest - permissions: - id-token: write - - steps: - - uses: actions/download-artifact@v8 - with: - pattern: '{wheels-*,sdist}' - merge-multiple: true - path: dist - - - uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.github/workflows/gh-pages.yml b/.github/workflows/gh-pages.yml deleted file mode 100644 index 7ac04ea..0000000 --- a/.github/workflows/gh-pages.yml +++ /dev/null @@ -1,72 +0,0 @@ -name: Deploy Website to GitHub Pages - -on: - push: - branches: - - main - workflow_dispatch: - -# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages -permissions: - contents: read - pages: write - id-token: write - -# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. -# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. -concurrency: - group: 'pages' - cancel-in-progress: false - -jobs: - build: - runs-on: ubuntu-latest - permissions: - contents: write - steps: - - name: Checkout repository - uses: actions/checkout@v6 - - - name: Cache Rust dependencies - uses: actions/cache@v5 - with: - path: | - ~/.cargo/bin/ - ~/.cargo/registry/index/ - ~/.cargo/registry/cache/ - ~/.cargo/git/db/ - target/ - key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} - restore-keys: | - ${{ runner.os }}-cargo- - - - name: Cache npm dependencies - uses: actions/cache@v5 - with: - path: | - reconcile-js/node_modules - ~/.npm - key: ${{ runner.os }}-npm-${{ hashFiles('reconcile-js/package-lock.json') }} - restore-keys: | - ${{ runner.os }}-npm- - - - name: Build wasm - run: | - which wasm-pack || cargo install wasm-pack - scripts/build-website.sh - - - name: Upload artifact - uses: actions/upload-pages-artifact@v4 - with: - path: examples/website/dist - - deploy: - environment: - name: github-pages - url: ${{ steps.deployment.outputs.page_url }} - runs-on: ubuntu-latest - needs: build - steps: - - name: Deploy to GitHub Pages - id: deployment - uses: actions/deploy-pages@v4 diff --git a/.gitignore b/.gitignore index 0957a69..c58feaf 100644 --- a/.gitignore +++ b/.gitignore @@ -10,5 +10,8 @@ node_modules # WebPack build output dist +# Generated wasm-bindgen bundler + wasm2js output for the React Native build +pkg-rn + # Python virtual environment .venv diff --git a/.vscode/settings.json b/.vscode/settings.json index db11dce..4571f3c 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -8,5 +8,8 @@ }, "rust-analyzer.cargo.features": [ "all" + ], + "python.analysis.extraPaths": [ + "./reconcile-python/python" ] } \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 80b1085..b371452 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -428,7 +428,7 @@ checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" [[package]] name = "reconcile-text" -version = "0.9.4" +version = "0.12.0" dependencies = [ "console_error_panic_hook", "diff-match-patch-rs", diff --git a/Cargo.toml b/Cargo.toml index b578b91..5e60f78 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "reconcile-text" description = "Intelligent 3-way text merging with automated conflict resolution" -version = "0.9.4" +version = "0.12.0" rust-version = "1.94" authors = ["Andras Schmelczer "] edition = "2024" diff --git a/README.md b/README.md index c25ef92..b644077 100644 --- a/README.md +++ b/README.md @@ -80,6 +80,13 @@ console.log(result.text); // "Hi beautiful world" See the [example website source](examples/website/src/index.ts) for a more complex example, or the [advanced examples document](docs/advanced-ts.md). +#### React Native (Hermes) + +React Native's default engine, Hermes, does not expose a runtime `WebAssembly` +global, so the WebAssembly build cannot run there. For React Native, the package +ships a pure-JavaScript build produced by [Binaryen's `wasm2js`](https://github.com/WebAssembly/binaryen) +via its `react-native` entry point. + ### Python Install via uv or pip: diff --git a/docs/advanced-ts.md b/docs/advanced-ts.md index 7e53bf5..dd1633d 100644 --- a/docs/advanced-ts.md +++ b/docs/advanced-ts.md @@ -2,40 +2,65 @@ ## Edit Provenance -Track which changes came from where using `reconcileWithHistory`: +Track which changes came from where using `reconcileWithHistory`. The result's +`history` field is typed as `SpanWithHistory[]`, and each span's `history` is a +`History` string-literal union. -```javascript -const result = reconcileWithHistory( - 'Hello world', - 'Hello beautiful world', - 'Hi world' -); +```typescript +import { reconcileWithHistory, type History, type SpanWithHistory } from 'reconcile-text'; -console.log(result.text); // "Hi beautiful world" -console.log(result.history); /* -[ - { - "text": "Hello", - "history": "RemovedFromRight" - }, - { - "text": "Hi", - "history": "AddedFromRight" - }, - { - "text": " beautiful", - "history": "AddedFromLeft" - }, - { - "text": " ", - "history": "Unchanged" - }, - { - "text": "world", - "history": "Unchanged" +const result = reconcileWithHistory('Hello world', 'Hello beautiful world', 'Hi world'); + +console.log(result.text); // "Hi beautiful world" + +const history: SpanWithHistory[] = result.history; +console.log(history); +// [ +// { text: "Hello", history: "RemovedFromRight" }, +// { text: "Hi", history: "AddedFromRight" }, +// { text: " beautiful", history: "AddedFromLeft" }, +// { text: " ", history: "Unchanged" }, +// { text: "world", history: "Unchanged" }, +// ] + +const classByHistory = { + Unchanged: 'merge-unchanged', + AddedFromLeft: 'merge-added-left', + AddedFromRight: 'merge-added-right', + RemovedFromLeft: 'merge-removed-left', + RemovedFromRight: 'merge-removed-right', +} satisfies Record; +``` + +Using `satisfies Record` keeps the object literal's values +narrow while forcing every history case to be handled. If a future version adds +another `History` value, TypeScript will point at this mapping. + +For control flow, use the same union as an exhaustiveness check: + +```typescript +import type { History } from 'reconcile-text'; + +function historyLabel(history: History): string { + switch (history) { + case 'Unchanged': + return 'unchanged'; + case 'AddedFromLeft': + return 'added by left'; + case 'AddedFromRight': + return 'added by right'; + case 'RemovedFromLeft': + return 'removed from left'; + case 'RemovedFromRight': + return 'removed from right'; + default: + return assertNever(history); } -] -*/ +} + +function assertNever(value: never): never { + throw new Error(`Unhandled history value: ${value}`); +} ``` ## Tokenisation Strategies @@ -45,26 +70,162 @@ console.log(result.history); /* - **Word tokeniser** (`"Word"`) - Splits on word boundaries (recommended for prose) - **Character tokeniser** (`"Character"`) - Individual characters (fine-grained control) - **Line tokeniser** (`"Line"`) - Line-by-line (similar to `git merge` or more precisely [`git merge-file`](https://git-scm.com/docs/git-merge-file)) +- **Markdown tokeniser** (`"Markdown"`) - Splits on Markdown structural boundaries (headings, list items, paragraphs) + +```typescript +import { reconcile, type BuiltinTokenizer } from 'reconcile-text'; + +const tokenizers = [ + 'Word', + 'Character', + 'Line', + 'Markdown', +] as const satisfies readonly BuiltinTokenizer[]; + +const result = reconcile('abc', 'axc', 'abyc', 'Character'); +console.log(result.text); // "axyc" + +for (const tokenizer of tokenizers) { + const merged = reconcile( + '# Title\n\n- old item\n', + '# Title\n\n- old item\n- left item\n', + '# New title\n\n- old item\n', + tokenizer + ); + + console.log(tokenizer, merged.text); +} +``` ## Cursor Tracking -`reconcile-text` automatically tracks cursor positions through merges, which is useful for collaborative editors. Selections can be tracked by providing them as a pair of cursors. +`reconcile-text` automatically tracks cursor positions through merges, which is +useful for collaborative editors. Selections can be tracked by providing them as +a pair of cursors. -```javascript -const result = reconcile( - 'Hello world', - { - text: 'Hello beautiful world', - cursors: [{ id: 1, position: 6 }], // After "Hello " - }, - { - text: 'Hi world', - cursors: [{ id: 2, position: 0 }], // At the beginning - } -); +```typescript +import { reconcile, type TextWithOptionalCursors } from 'reconcile-text'; + +const left = { + text: 'Hello beautiful world', + cursors: [{ id: 1, position: 6 }], // After "Hello " +} satisfies TextWithOptionalCursors; + +const right = { + text: 'Hi world', + cursors: [{ id: 2, position: 0 }], // At the beginning +} satisfies TextWithOptionalCursors; + +const result = reconcile('Hello world', left, right); // Result: "Hi beautiful world" with repositioned cursors -console.log(result.text); // "Hi beautiful world" +console.log(result.text); // "Hi beautiful world" console.log(result.cursors); // [{ id: 2, position: 0 }, { id: 1, position: 3 }] ``` + > The `cursors` list is sorted by character position (not IDs). + +## Generic Helpers and Inference + +The exported merge functions are intentionally small: they merge strings, or +strings plus cursor metadata. In TypeScript applications, keep domain-specific +metadata in your own typed wrappers and let inference preserve the surrounding +shape. + +```typescript +import { reconcile, type BuiltinTokenizer } from 'reconcile-text'; + +type ReconciledText = Omit & { + text: string; +}; + +function reconcileDraft( + parent: TDraft, + left: TDraft, + right: TDraft, + tokenizer?: BuiltinTokenizer +): ReconciledText { + return { + ...right, + text: reconcile(parent.text, left.text, right.text, tokenizer).text, + }; +} + +interface MarkdownDraft { + id: string; + text: string; + updatedAt: Date; +} + +const parent: MarkdownDraft = { + id: 'intro', + text: '# Title\n\nOld text\n', + updatedAt: new Date('2026-01-01T00:00:00Z'), +}; + +const left: MarkdownDraft = { + ...parent, + text: '# Title\n\nOld text\n\n- left note\n', +}; + +const right: MarkdownDraft = { + ...parent, + text: '# New title\n\nOld text\n', +}; + +const merged = reconcileDraft(parent, left, right, 'Markdown'); +// merged is inferred as { id: string; updatedAt: Date; text: string } +``` + +Use `satisfies` for configuration objects and cursor payloads when you want +compile-time checking without widening everything to the library interface. + +```typescript +import type { BuiltinTokenizer, TextWithOptionalCursors } from 'reconcile-text'; + +const mergeOptions = { + tokenizer: 'Markdown', + renderDeletedSpans: true, +} satisfies { + tokenizer: BuiltinTokenizer; + renderDeletedSpans: boolean; +}; + +const documentWithSelection = { + text: 'Hello beautiful world', + cursors: [ + { id: 1, position: 6 }, + { id: 2, position: 15 }, + ], +} satisfies TextWithOptionalCursors; +``` + +## Compact Diffs + +Generate and apply compact diff representations. The TypeScript type is +`Array` for `diff()` and `Array` for +`undiff()`, because the underlying WebAssembly layer may represent integer +entries as `bigint`. + +```typescript +import { diff, undiff } from 'reconcile-text'; + +const original = 'Hello world'; +const changed = 'Hello beautiful world'; + +// Generate a compact diff +const changes = diff(original, changed); +console.log(changes); // [5, " beautiful world"] + +// Reconstruct the changed text from the diff +const reconstructed = undiff(original, changes); +console.assert(reconstructed === changed); +``` + +Diff entries are positive integers (retain N characters), negative integers +(delete N characters), and strings (insert text). + +## Complete Example + +For a complete browser example that renders `SpanWithHistory` values and cursor +selections, see the [example website source](../examples/website/src/index.ts). diff --git a/examples/website/package-lock.json b/examples/website/package-lock.json index 39397c3..a9369d7 100644 --- a/examples/website/package-lock.json +++ b/examples/website/package-lock.json @@ -28,11 +28,12 @@ }, "../../reconcile-js": { "name": "reconcile-text", - "version": "0.9.4", + "version": "0.12.0", "dev": true, "license": "MIT", "devDependencies": { "@types/jest": "^30.0.0", + "binaryen": "^123.0.0", "jest": "^30.3.0", "prettier": "^3.8.1", "reconcile-text": "file:../pkg", @@ -1135,6 +1136,7 @@ "integrity": "sha512-UVJyE9MttOsBQIDKw1skb9nAwQuR5wuGD3+82K6JgJlm/Y+KI92oNsMNGZCYdDsVtRHSak0pcV5Dno5+4jh9sw==", "dev": true, "license": "MIT", + "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -1175,6 +1177,7 @@ "integrity": "sha512-PlXPeEWMXMZ7sPYOHqmDyCJzcfNrUr3fGNKtezX14ykXOEIvyK81d+qydx89KY5O71FKMPaQ2vBfBFI5NHR63A==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", @@ -1411,6 +1414,7 @@ } ], "license": "MIT", + "peer": true, "dependencies": { "baseline-browser-mapping": "^2.9.0", "caniuse-lite": "^1.0.30001759", @@ -3976,6 +3980,7 @@ } ], "license": "MIT", + "peer": true, "dependencies": { "nanoid": "^3.3.11", "picocolors": "^1.1.1", @@ -4484,6 +4489,7 @@ "integrity": "sha512-+4N/u9dZ4PrgzGgPlKnaaRQx64RO0JBKs9sDhQ2pLgN6JQZ25uPQZKQYaBJU48Kd5BxgXoJ4e09Dq7nMcOUW3A==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "chokidar": "^4.0.0", "immutable": "^5.1.5", @@ -5303,6 +5309,7 @@ "integrity": "sha512-M7BAV6Rlcy5u+m6oPhAPFgJTzAioX/6B0DxyvDlo9l8+T3nLKbrczg2WLUyzd45L8RqfUMyGPzekbMvX2Ldkwg==", "dev": true, "license": "MIT", + "peer": true, "engines": { "node": ">=12" }, @@ -5386,7 +5393,8 @@ "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", "dev": true, - "license": "0BSD" + "license": "0BSD", + "peer": true }, "node_modules/tsyringe": { "version": "4.10.0", @@ -5428,6 +5436,7 @@ "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", "dev": true, "license": "Apache-2.0", + "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -5568,6 +5577,7 @@ "integrity": "sha512-jTywjboN9aHxFlToqb0K0Zs9SbBoW4zRUlGzI2tYNxVYcEi/IPpn+Xi4ye5jTLvX2YeLuic/IvxNot+Q1jMoOw==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@types/eslint-scope": "^3.7.7", "@types/estree": "^1.0.8", @@ -5617,6 +5627,7 @@ "integrity": "sha512-MfwFQ6SfwinsUVi0rNJm7rHZ31GyTcpVE5pgVA3hwFRb7COD4TzjUUwhGWKfO50+xdc2MQPuEBBJoqIMGt3JDw==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@discoveryjs/json-ext": "^0.6.1", "@webpack-cli/configtest": "^3.0.1", diff --git a/reconcile-js/package-lock.json b/reconcile-js/package-lock.json index 5fe2e42..b1a50b6 100644 --- a/reconcile-js/package-lock.json +++ b/reconcile-js/package-lock.json @@ -1,15 +1,16 @@ { "name": "reconcile-text", - "version": "0.9.4", + "version": "0.12.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "reconcile-text", - "version": "0.9.4", + "version": "0.12.0", "license": "MIT", "devDependencies": { "@types/jest": "^30.0.0", + "binaryen": "^123.0.0", "jest": "^30.3.0", "prettier": "^3.8.1", "reconcile-text": "file:../pkg", @@ -24,7 +25,7 @@ }, "../pkg": { "name": "reconcile-text", - "version": "0.9.4", + "version": "0.12.0", "dev": true, "license": "MIT" }, @@ -65,6 +66,7 @@ "version": "7.28.0", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@ampproject/remapping": "^2.2.0", "@babel/code-frame": "^7.27.1", @@ -1656,6 +1658,7 @@ "integrity": "sha512-UVJyE9MttOsBQIDKw1skb9nAwQuR5wuGD3+82K6JgJlm/Y+KI92oNsMNGZCYdDsVtRHSak0pcV5Dno5+4jh9sw==", "dev": true, "license": "MIT", + "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -1682,6 +1685,7 @@ "integrity": "sha512-PlXPeEWMXMZ7sPYOHqmDyCJzcfNrUr3fGNKtezX14ykXOEIvyK81d+qydx89KY5O71FKMPaQ2vBfBFI5NHR63A==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", @@ -1908,6 +1912,24 @@ "node": ">=6.0.0" } }, + "node_modules/binaryen": { + "version": "123.0.0", + "resolved": "https://registry.npmjs.org/binaryen/-/binaryen-123.0.0.tgz", + "integrity": "sha512-/hls/a309aZCc0itqP6uhoR+5DsKSlJVfB8Opd2BY9Ndghs84IScTunlyidyF4r2Xe3lQttnfBNIDjaNpj6mTw==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "wasm-as": "bin/wasm-as", + "wasm-ctor-eval": "bin/wasm-ctor-eval", + "wasm-dis": "bin/wasm-dis", + "wasm-merge": "bin/wasm-merge", + "wasm-metadce": "bin/wasm-metadce", + "wasm-opt": "bin/wasm-opt", + "wasm-reduce": "bin/wasm-reduce", + "wasm-shell": "bin/wasm-shell", + "wasm2js": "bin/wasm2js" + } + }, "node_modules/brace-expansion": { "version": "1.1.12", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz", @@ -1950,6 +1972,7 @@ } ], "license": "MIT", + "peer": true, "dependencies": { "baseline-browser-mapping": "^2.9.0", "caniuse-lite": "^1.0.30001759", @@ -3053,6 +3076,7 @@ "integrity": "sha512-AkXIIFcaazymvey2i/+F94XRnM6TsVLZDhBMLsd1Sf/W0wzsvvpjeyUrCZD6HGG4SDYPgDJDBKeiJTBb10WzMg==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@jest/core": "30.3.0", "@jest/types": "30.3.0", @@ -4936,6 +4960,7 @@ "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", "dev": true, "license": "Apache-2.0", + "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -5072,6 +5097,7 @@ "integrity": "sha512-jTywjboN9aHxFlToqb0K0Zs9SbBoW4zRUlGzI2tYNxVYcEi/IPpn+Xi4ye5jTLvX2YeLuic/IvxNot+Q1jMoOw==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@types/eslint-scope": "^3.7.7", "@types/estree": "^1.0.8", @@ -5119,6 +5145,7 @@ "version": "6.0.1", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@discoveryjs/json-ext": "^0.6.1", "@webpack-cli/configtest": "^3.0.1", diff --git a/reconcile-js/package.json b/reconcile-js/package.json index 3b28195..da3a0ce 100644 --- a/reconcile-js/package.json +++ b/reconcile-js/package.json @@ -1,9 +1,10 @@ { "name": "reconcile-text", - "version": "0.9.4", + "version": "0.12.0", "description": "Intelligent 3-way text merging with automated conflict resolution", "main": "dist/reconcile.node.js", "browser": "dist/reconcile.web.js", + "react-native": "dist/reconcile.rn.js", "keywords": [ "text editing", "sync", @@ -18,7 +19,7 @@ "homepage": "https://schmelczer.dev/reconcile/", "repository": { "type": "git", - "url": "https://github.com/schmelczer/reconcile.git" + "url": "git+https://github.com/schmelczer/reconcile.git" }, "bugs": { "url": "https://github.com/schmelczer/reconcile/issues", @@ -31,12 +32,13 @@ "dist/**/*" ], "scripts": { - "build": "webpack --mode production", - "format": "prettier --write \"./**/*.(ts|scss|json|html)\"", + "build": "node scripts/build-rn.mjs && webpack --mode production", + "format": "prettier --write \"./**/*.(ts|mjs|scss|json|html)\"", "test": "NODE_OPTIONS=\"$NODE_OPTIONS --experimental-vm-modules\" jest" }, "devDependencies": { "@types/jest": "^30.0.0", + "binaryen": "^123.0.0", "jest": "^30.3.0", "prettier": "^3.8.1", "reconcile-text": "file:../pkg", diff --git a/reconcile-js/scripts/build-rn.mjs b/reconcile-js/scripts/build-rn.mjs new file mode 100644 index 0000000..dadbc82 --- /dev/null +++ b/reconcile-js/scripts/build-rn.mjs @@ -0,0 +1,307 @@ +// Generates `pkg-rn/`: a React Native / Hermes-compatible build of the +// wasm-bindgen bindings in which the WebAssembly module is replaced by its +// wasm2js (pure-JS) translation. + +import { execFileSync } from 'node:child_process'; +import { existsSync, readdirSync, readFileSync, rmSync, writeFileSync } from 'node:fs'; +import { dirname, resolve } from 'node:path'; +import { fileURLToPath, pathToFileURL } from 'node:url'; +import { homedir } from 'node:os'; + +const here = dirname(fileURLToPath(import.meta.url)); +const reconcileJsDir = resolve(here, '..'); +const repoRoot = resolve(reconcileJsDir, '..'); + +const releaseWasm = resolve( + repoRoot, + 'target/wasm32-unknown-unknown/release/reconcile_text.wasm' +); +const outDir = resolve(reconcileJsDir, 'pkg-rn'); +const bgWasm = resolve(outDir, 'reconcile_text_bg.wasm'); +const bgWasmJs = resolve(outDir, 'reconcile_text_bg.wasm.js'); +const loweredWasm = resolve(outDir, '_lowered.wasm'); +const entryJs = resolve(outDir, 'reconcile_text.js'); + +const wasmOpt = resolve(reconcileJsDir, 'node_modules/.bin/wasm-opt'); +const wasm2js = resolve(reconcileJsDir, 'node_modules/.bin/wasm2js'); + +function run(cmd, args) { + execFileSync(cmd, args, { stdio: 'inherit' }); +} + +// Locate the wasm-bindgen CLI. It MUST match the `wasm-bindgen` crate version pinned +// in Cargo.toml: a mismatched CLI emits bindings the runtime can't use. So we resolve +// the required version first and verify every candidate against it, failing loudly +// rather than silently falling back to whatever other version happens to be around. +function findWasmBindgen() { + const cargoToml = readFileSync(resolve(repoRoot, 'Cargo.toml'), 'utf8'); + const wanted = cargoToml.match( + /wasm-bindgen\s*=\s*\{[^}]*version\s*=\s*"([^"]+)"/ + )?.[1]; + if (!wanted) { + throw new Error( + '[build-rn] Could not parse the pinned wasm-bindgen version from Cargo.toml, so ' + + 'the required CLI version is unknown. Has the dependency declaration changed?' + ); + } + + // 1. On PATH: accept it only if its version matches the pin. + let onPath = null; + try { + onPath = execFileSync('which', ['wasm-bindgen'], { encoding: 'utf8' }).trim(); + } catch { + /* not on PATH; try the wasm-pack cache next */ + } + if (onPath) { + const version = execFileSync(onPath, ['--version'], { encoding: 'utf8' }).match( + /\d+\.\d+\.\d+/ + )?.[0]; + if (version !== wanted) { + throw new Error( + `[build-rn] wasm-bindgen on PATH (${onPath}) is ${version ?? 'an unknown version'}, ` + + `but Cargo.toml pins ${wanted}. Install the matching CLI ` + + `(\`cargo install wasm-bindgen-cli --version ${wanted}\`) or remove the mismatched one.` + ); + } + return onPath; + } + + const cacheRoots = [ + resolve(homedir(), 'Library/Caches/.wasm-pack'), + resolve(homedir(), '.cache/.wasm-pack'), + ]; + for (const root of cacheRoots) { + if (!existsSync(root)) { + continue; + } + for (const entry of readdirSync(root)) { + const candidate = resolve(root, entry, 'wasm-bindgen'); + if (!existsSync(candidate)) { + continue; + } + let version; + try { + version = execFileSync(candidate, ['--version'], { encoding: 'utf8' }).match( + /\d+\.\d+\.\d+/ + )?.[0]; + } catch { + continue; // not an invokable wasm-bindgen; ignore + } + if (version === wanted) { + return candidate; + } + } + } + + throw new Error( + `[build-rn] No wasm-bindgen ${wanted} found on PATH or in the wasm-pack cache. ` + + 'Run `wasm-pack build --target web --features wasm` first (it caches the matching ' + + `wasm-bindgen), or \`cargo install wasm-bindgen-cli --version ${wanted}\`.` + ); +} + +if (!existsSync(releaseWasm)) { + throw new Error( + `Missing ${releaseWasm}.\nRun \`wasm-pack build --target web --features wasm\` from the repo root first.` + ); +} + +console.log('[build-rn] generating bundler-target bindings with wasm-bindgen'); +rmSync(outDir, { recursive: true, force: true }); +const wasmBindgen = findWasmBindgen(); +run(wasmBindgen, ['--target', 'bundler', '--out-dir', outDir, releaseWasm]); + +// --- Patch wasm-bindgen's cached-memory getters for wasm2js ----------------- +// +// wasm-bindgen caches typed-array / DataView views over `wasm.memory.buffer` and +// only re-creates them when it detects the heap grew. It detects a grow by looking +// for ArrayBuffer *detachment*: a real `WebAssembly.Memory.grow()` detaches the old +// buffer (its `byteLength` becomes 0 and `.detached` becomes true), and those are the +// only signals the generated getters check: +// - getUint8ArrayMemory0(): refreshes when `byteLength === 0` (detach only) +// - getDataViewMemory0(): refreshes when `.detached === true`, OR when the buffer +// identity changed but only `if (.detached === undefined)` — i.e. that identity +// fallback runs solely on engines lacking `ArrayBuffer.prototype.detached`. +// +// wasm2js grows differently: `__wasm_memory_grow` (in reconcile_text_bg.wasm.js) +// allocates a NEW ArrayBuffer, copies the old heap into it, and reassigns +// `memory.buffer` WITHOUT ever detaching the old buffer. So the old buffer keeps +// `byteLength > 0` and `.detached === false`, and on modern engines that DO expose +// `ArrayBuffer.prototype.detached` (Node 25+, current Hermes) the identity fallback is +// gated off. Net effect: after a grow the getters keep returning views over the stale +// pre-grow buffer, silently corrupting any operation large enough to grow the heap. +// Small inputs never grow, so this escapes naive testing. +// +// WHY WE PATCH INSTEAD OF CONFIGURING. +// This is not fixed or configurable upstream: wasm-bindgen has no wasm2js / asm.js / +// React Native / "no-WebAssembly" target (every target assumes real WebAssembly +// detach-on-grow semantics), there is no flag to force buffer-identity comparison, and +// the getter-generation logic (crates/cli-support/src/js/mod.rs `memview`) is +// byte-for-byte identical from the pinned 0.2.114 through the latest release and +// `main`. The non-detaching-grow case is not even a tracked upstream issue. Rewriting +// the generated glue is therefore the only available fix: the two replacements below +// make BOTH getters also refresh on a buffer-identity change +// (`buffer !== wasm.memory.buffer`), which is the one signal wasm2js does give. +// +// Each replacement is asserted independently. If a future wasm-bindgen reshapes one +// getter but not the other, we MUST fail the build rather than ship a half-patched +// module whose un-patched getter corrupts large inputs. The post-build self-test at +// the bottom of this file is the backstop that proves the result survives a real grow. +const bgJsPath = resolve(outDir, 'reconcile_text_bg.js'); +let bgJs = readFileSync(bgJsPath, 'utf8'); + +// (1) Uint8Array getter: append an unconditional buffer-identity check to the +// `byteLength === 0` detach guard (upstream has no identity check here at all). +const byteLengthGuard = /(cached\w*Memory0)\.byteLength === 0/g; +const byteLengthHits = bgJs.match(byteLengthGuard)?.length ?? 0; +if (byteLengthHits === 0) { + throw new Error( + `[build-rn] Could not find the Uint8Array \`byteLength === 0\` growth guard in ` + + `${bgJsPath} to patch for wasm2js. The wasm-bindgen output shape changed; update ` + + 'this patch (see crates/cli-support/src/js/mod.rs `memview`) — do NOT ship an ' + + 'unpatched getter, it will corrupt large inputs under wasm2js.' + ); +} +bgJs = bgJs.replace( + byteLengthGuard, + '$1.byteLength === 0 || $1.buffer !== wasm.memory.buffer' +); + +// (2) DataView getter: drop the `detached === undefined &&` prefix so the existing +// buffer-identity check runs on every runtime, not only legacy ones. +const gatedGuard = + /(cached\w*Memory0)\.buffer\.detached === undefined && \1\.buffer !== wasm\.memory\.buffer/g; +const gatedHits = bgJs.match(gatedGuard)?.length ?? 0; +if (gatedHits === 0) { + throw new Error( + `[build-rn] Could not find the DataView \`detached === undefined\`-gated buffer-identity ` + + `check in ${bgJsPath} to un-gate for wasm2js. The wasm-bindgen output shape changed; ` + + 'update this patch (see crates/cli-support/src/js/mod.rs `memview`) — do NOT ship an ' + + 'unpatched getter, it will corrupt large inputs under wasm2js.' + ); +} +bgJs = bgJs.replace(gatedGuard, '$1.buffer !== wasm.memory.buffer'); + +writeFileSync(bgJsPath, bgJs); + +// Post-MVP features that wasm2js cannot translate must be lowered to MVP first. +// reference-types stays enabled: it only covers the funcref table here, which +// wasm2js handles via call_indirect. +const featureFlags = [ + '--enable-bulk-memory', + '--enable-sign-ext', + '--enable-nontrapping-float-to-int', + '--enable-mutable-globals', + '--enable-reference-types', +]; + +console.log('[build-rn] optimising and lowering to MVP with wasm-opt'); +run(wasmOpt, [ + ...featureFlags, + '-O3', + '--signext-lowering', + '--llvm-memory-copy-fill-lowering', + '--llvm-nontrapping-fptoint-lowering', + bgWasm, + '-o', + loweredWasm, +]); + +console.log('[build-rn] translating wasm -> JS with wasm2js'); +run(wasm2js, ['--enable-reference-types', loweredWasm, '-o', bgWasmJs]); + +console.log('[build-rn] wiring the JS translation into reconcile_text.js'); +const entry = readFileSync(entryJs, 'utf8'); +const rewired = entry.replace( + /from\s+(['"])\.\/reconcile_text_bg\.wasm\1/, + 'from $1./reconcile_text_bg.wasm.js$1' +); +if (rewired === entry) { + throw new Error( + `Could not find the \`./reconcile_text_bg.wasm\` import in ${entryJs}; ` + + 'the wasm-bindgen bundler output layout may have changed.' + ); +} +writeFileSync(entryJs, rewired); + +// The binary and the intermediate are no longer referenced; remove them so no +// bundler attempts to instantiate WebAssembly from this directory. +rmSync(bgWasm, { force: true }); +rmSync(loweredWasm, { force: true }); + +// Mark the directory as ESM (matching the web `pkg/`) so Node and Jest treat +// these `.js` files as modules. `sideEffects` stays true because importing the +// entry runs `__wbg_set_wasm(...)`, which must not be tree-shaken away. +writeFileSync( + resolve(outDir, 'package.json'), + JSON.stringify({ type: 'module', sideEffects: true }, null, 2) + '\n' +); + +// Backstop: import the freshly generated module and prove it survives a heap grow. +// The patches above are matched by regex against wasm-bindgen output; a silently +// mis-applied patch (or a wasm-bindgen change we matched too loosely) would leave a +// getter reading the stale pre-grow buffer and corrupt large inputs only. Rather than +// trust the regexes, we force a grow here and assert a byte-exact round-trip, so a +// broken bundle fails the build instead of reaching a React Native consumer. +async function selfTest() { + // Importing the entry runs `__wbg_set_wasm(...)`, initialising the wasm2js module. + const api = await import(pathToFileURL(entryJs).href); + // Same module instance (Node caches by resolved path), so this `memory` is the heap + // the API operates on; its `.buffer` getter reflects the current (post-grow) buffer. + const { memory } = await import(pathToFileURL(bgWasmJs).href); + + // ~100 KB of distinct tokens. The diff working set amplifies the input many-fold + // (a ~50 KB input already forces dozens of grows), so this reliably grows the heap + // well past wasm2js's ~1 MB initial allocation while staying fast. A tiny parent + // keeps the edit distance — and therefore the runtime — small. + const tokens = []; + for (let i = 0; i < 10000; i++) { + tokens.push(`token-${i}`); + } + const target = tokens.join(' '); + const parent = 'reconcile self-test'; + + const heapBefore = memory.buffer.byteLength; + + // Stale post-grow reads surface either as an out-of-bounds throw or as silently + // wrong bytes, so handle both: a throw here is itself the failure signal. + let roundTripped; + try { + const changed = new api.TextWithCursors(target, []); + const compact = api + .diff(parent, changed, 'Word') + // This build's `undiff` rejects BigInt; normalise exactly as src/core.ts does. + .map((item) => (typeof item === 'bigint' ? Number(item) : item)); + changed.free(); + roundTripped = api.undiff(parent, compact, 'Word'); + } catch (cause) { + throw new Error( + '[build-rn] self-test crashed during a large diff/undiff round-trip (after the heap ' + + 'grew). This is the signature of unpatched wasm2js cached-memory getters reading the ' + + 'stale pre-grow buffer. The growth patch is not taking effect. Refusing to ship this ' + + 'React Native bundle.', + { cause } + ); + } + + const heapAfter = memory.buffer.byteLength; + + if (heapAfter <= heapBefore) { + throw new Error( + `[build-rn] self-test did not grow the wasm heap (stayed at ${heapBefore} bytes), ` + + 'so it cannot validate the memory-growth patch. Enlarge the self-test input.' + ); + } + if (roundTripped !== target) { + throw new Error( + '[build-rn] self-test FAILED: diff/undiff round-trip did not match after a heap grow. ' + + 'The patched wasm2js cached-memory getters are returning stale/corrupt data — the ' + + 'growth patch is not taking effect. Refusing to ship this React Native bundle.' + ); + } +} + +console.log('[build-rn] self-testing the patched module (forces a heap grow)'); +await selfTest(); + +console.log('[build-rn] done -> pkg-rn/'); diff --git a/reconcile-js/src/core.ts b/reconcile-js/src/core.ts new file mode 100644 index 0000000..cf2d1ec --- /dev/null +++ b/reconcile-js/src/core.ts @@ -0,0 +1,400 @@ +// Shared, platform-agnostic wrapper around the generated wasm-bindgen surface. +// +// The actual wasm bindings are injected by a platform-specific entrypoint: +// - `index.ts` (web/node) instantiates the real WebAssembly module lazily +// on first use via `initSync`. +// - `index.rn.ts` (React Native / Hermes) links a wasm2js (pure-JS) +// implementation, since Hermes does not expose a runtime +// `WebAssembly` global. See `scripts/build-rn.mjs`. + +type WasmModule = typeof import('reconcile-text'); + +/** + * The generated wasm-bindgen surface this library wraps, plus a hook to make + * sure the underlying module is ready. Supplied by a platform entrypoint. + */ +export interface WasmBackend { + CursorPosition: WasmModule['CursorPosition']; + TextWithCursors: WasmModule['TextWithCursors']; + reconcile: WasmModule['reconcile']; + reconcileWithHistory: WasmModule['reconcileWithHistory']; + diff: WasmModule['diff']; + undiff: WasmModule['undiff']; + /** + * Make the wasm module ready for use. Invoked before every operation, so it + * must be cheap and idempotent (a no-op once initialised). + */ + ensureReady(): void; +} + +// Define the enum values as a const array to avoid duplication +const BUILTIN_TOKENIZERS = ['Character', 'Line', 'Markdown', 'Word'] as const; + +/** + * Tokenisation strategies for text merging. + * + * These correspond to the built-in tokenizers available in the underlying WASM module. + */ +export type BuiltinTokenizer = (typeof BUILTIN_TOKENIZERS)[number]; + +/** + * History classification for text spans in merge results. + * + * Indicates the origin of each text span in the merged document. + */ +export type History = + | 'Unchanged' + | 'AddedFromLeft' + | 'AddedFromRight' + | 'RemovedFromLeft' + | 'RemovedFromRight'; + +/** + * Represents a text document with associated cursor positions. + * + * This interface is used both as input to reconcile functions (to specify where + * cursors are positioned in the original documents) and as output (with cursors + * automatically repositioned after merging). + */ +export interface TextWithCursors { + /** The document's entire content as a string */ + text: string; + + /** + * Array of cursor positions within the text. Can be empty if there are no cursors to track. + * Each cursor has a unique ID and position. + */ + cursors: CursorPosition[]; +} + +/** + * Like `TextWithCursors`, but cursors may be null or undefined (treated as empty). + * Used as input where cursor tracking is optional. + */ +export interface TextWithOptionalCursors { + /** The document's entire content as a string */ + text: string; + + /** + * Array of cursor positions within the text. Can be null, undefined, or empty + * if there are no cursors to track. Each cursor has a unique ID and position. + */ + cursors: null | undefined | CursorPosition[]; +} + +/** + * Represents a cursor position within a text document. + * + * Cursors are automatically repositioned during text merging to maintain their + * relative positions as text is inserted, deleted, or modified around them. + */ +export interface CursorPosition { + /** Unique identifier for the cursor (can be any number, must be unique within the document) */ + id: number; + + /** Character position in the text, 0-based index from the beginning of the document */ + position: number; +} + +/** + * Represents a merged text document with cursor positions and detailed change history. + * + * This is the return type of `reconcileWithHistory()` and provides complete information + * about how the merge was performed, including which parts of the final text came from + * which source documents. + */ +export interface TextWithCursorsAndHistory { + /** The merged document's entire content */ + text: string; + + /** + * Array of cursor positions within the merged text. Can be empty if there are no cursors to track. + * All cursors are automatically repositioned from the left and right documents. + */ + cursors: CursorPosition[]; + + /** + * Detailed provenance information showing the origin of each text span in the result. + * Each span indicates whether it was unchanged, added from left, added from right, etc. + */ + history: SpanWithHistory[]; +} + +/** + * Represents a span of text in the merged result with its change history. + * + * This shows exactly which source document contributed each piece of text to the + * final merged result. Useful for understanding merge decisions and creating + * visualisations of how documents were combined. + */ +export interface SpanWithHistory { + /** The text content of this span */ + text: string; + + /** The origin of this text span in the merge result */ + history: History; +} + +/** The public, synchronous API surface, identical across platforms. */ +export interface ReconcileApi { + /** + * Merges three versions of text using intelligent conflict resolution. + * + * This is the primary function for 3-way text merging. Unlike traditional merge tools + * that produce conflict markers, this function automatically resolves conflicts by + * applying both sets of changes where possible. + * + * @param original - The original/base version of the text that both sides diverged from + * @param left - The left version of the text (either string or TextWithCursors with cursor positions) + * @param right - The right version of the text (either string or TextWithCursors with cursor positions) + * @param tokenizer - The tokenisation strategy: "Word" (default, recommended for prose), + * "Character" (fine-grained), "Line" (similar to git merge), or + * "Markdown" (splits on Markdown structure) + * @returns The reconciled text with automatically repositioned cursor positions + * + * @example + * ```typescript + * const original = "Hello world"; + * const left = "Hello beautiful world"; // Added "beautiful" + * const right = "Hi world"; // Changed "Hello" to "Hi" + * + * const result = reconcile(original, left, right); + * console.log(result.text); // "Hi beautiful world" + * ``` + */ + reconcile( + original: string, + left: string | TextWithOptionalCursors, + right: string | TextWithOptionalCursors, + tokenizer?: BuiltinTokenizer + ): TextWithCursors; + + /** + * Generates a compact diff representation between an original and changed text. + * + * These can be parsed and unpacked using the `undiff` function or the Rust crate's EditedText::from_diff. + * Cursor positions are omitted from the diff result. + * + * This function computes the differences between two versions of text and returns + * a compact representation of those changes. + * + * @param original - The original/base version of the text + * @param changed - The modified version of the text (either string or TextWithCursors with cursor positions) + * @param tokenizer - The tokenisation strategy, which is the same as used in `reconcile`. + * @returns An array of inserts (strings), deletes (negative integers), and retained spans (positive integers). + */ + diff( + original: string, + changed: string | TextWithOptionalCursors, + tokenizer?: BuiltinTokenizer + ): Array; + + /** + * Applies a compact diff to an original text to reconstruct the changed version. + * + * This function takes an original text and a compact diff representation (as produced + * by the `diff` function) and reconstructs the modified text. + * + * @param original - The original/base version of the text + * @param diff - The compact diff array (inserts as strings, deletes as negative integers, retained spans as positive integers) + * @param tokenizer - The tokenisation strategy, which is the same as used in `reconcile`. + * @returns The reconstructed changed text as a string. + */ + undiff( + original: string, + diff: Array, + tokenizer?: BuiltinTokenizer + ): string; + + /** + * Merges three versions of text and returns detailed provenance information. + * + * This function behaves like `reconcile()` but also provides + * detailed historical information about the origin of each text span in the result. + * This is valuable for understanding how the merge was performed and which changes + * came from which source. + * + * Note: Computing the history is computationally more expensive than the basic merge. + * + * @param original - The original/base version of the text that both sides diverged from + * @param left - The left version of the text (either string or TextWithCursors with cursor positions) + * @param right - The right version of the text (either string or TextWithCursors with cursor positions) + * @param tokenizer - The tokenisation strategy: "Word" (default, recommended for prose), + * "Character" (fine-grained), "Line" (similar to git merge), or + * "Markdown" (splits on Markdown structure) + * @returns The reconciled text with cursor positions and detailed change history + * + * @example + * ```typescript + * const original = "Hello world"; + * const left = "Hello beautiful world"; + * const right = "Hi world"; + * + * const result = reconcileWithHistory(original, left, right); + * console.log(result.text); // "Hi beautiful world" + * console.log(result.history); // Array of SpanWithHistory objects showing change origins + * ``` + */ + reconcileWithHistory( + original: string, + left: string | TextWithOptionalCursors, + right: string | TextWithOptionalCursors, + tokenizer?: BuiltinTokenizer + ): TextWithCursorsAndHistory; +} + +const UNSUPPORTED_TOKENIZER_ERROR = `Unsupported tokenizer, only ${BUILTIN_TOKENIZERS.join( + ', ' +)} are supported`; + +/** + * Build the public {@link ReconcileApi} on top of a {@link WasmBackend}. + * + * Each operation calls `backend.ensureReady()` first, then marshals JS values + * into the wasm representation, invokes the binding, and frees the wasm-side + * objects. The behaviour is identical regardless of whether the backend is a + * real WebAssembly module or its wasm2js translation. + */ +export function makeReconcileApi(backend: WasmBackend): ReconcileApi { + function assertTokenizer(tokenizer: BuiltinTokenizer): void { + if (!BUILTIN_TOKENIZERS.includes(tokenizer)) { + throw new Error(UNSUPPORTED_TOKENIZER_ERROR); + } + } + + function toWasmTextWithCursors(text: string | TextWithOptionalCursors) { + const isInputString = typeof text === 'string'; + const innerText = isInputString ? text : text.text; + const innerCursors = isInputString ? [] : (text.cursors ?? []); + + return new backend.TextWithCursors( + innerText, + innerCursors.map(({ id, position }) => new backend.CursorPosition(id, position)) + ); + } + + function toTextWithCursors(textWithCursor: { + text(): string; + cursors(): Array<{ id(): number; characterIndex(): number; free(): void }>; + }): TextWithCursors { + const wasmCursors = textWithCursor.cursors(); + const cursors = wasmCursors.map((cursor) => ({ + id: cursor.id(), + position: cursor.characterIndex(), + })); + for (const cursor of wasmCursors) { + cursor.free(); + } + + return { + text: textWithCursor.text(), + cursors, + }; + } + + function toSpanWithHistory(span: { + text(): string; + history(): History; + free(): void; + }): SpanWithHistory { + const result = { + text: span.text(), + history: span.history(), + }; + span.free(); + return result; + } + + function reconcile( + original: string, + left: string | TextWithOptionalCursors, + right: string | TextWithOptionalCursors, + tokenizer: BuiltinTokenizer = 'Word' + ): TextWithCursors { + backend.ensureReady(); + assertTokenizer(tokenizer); + + const leftCursor = toWasmTextWithCursors(left); + const rightCursor = toWasmTextWithCursors(right); + + const result = backend.reconcile(original, leftCursor, rightCursor, tokenizer); + + leftCursor.free(); + rightCursor.free(); + + const jsResult = toTextWithCursors(result); + result.free(); + + return jsResult; + } + + function diff( + original: string, + changed: string | TextWithOptionalCursors, + tokenizer: BuiltinTokenizer = 'Word' + ): Array { + backend.ensureReady(); + assertTokenizer(tokenizer); + + const changedWasm = toWasmTextWithCursors(changed); + + const result = backend.diff(original, changedWasm, tokenizer); + + changedWasm.free(); + + return result.map((item) => (typeof item === 'bigint' ? Number(item) : item)); + } + + function undiff( + original: string, + diffValue: Array, + tokenizer: BuiltinTokenizer = 'Word' + ): string { + backend.ensureReady(); + assertTokenizer(tokenizer); + + // The real-WebAssembly backend's `diff` emits BigInt spans, whereas the + // wasm2js (React Native) backend rejects BigInt outright. Normalise to + // plain numbers - exactly as `diff` does on the way out - so a `diff` + // result round-trips through `undiff` identically on every platform. + return backend.undiff( + original, + diffValue.map((item) => (typeof item === 'bigint' ? Number(item) : item)), + tokenizer + ); + } + + function reconcileWithHistory( + original: string, + left: string | TextWithOptionalCursors, + right: string | TextWithOptionalCursors, + tokenizer: BuiltinTokenizer = 'Word' + ): TextWithCursorsAndHistory { + backend.ensureReady(); + assertTokenizer(tokenizer); + + const leftCursor = toWasmTextWithCursors(left); + const rightCursor = toWasmTextWithCursors(right); + + const result = backend.reconcileWithHistory( + original, + leftCursor, + rightCursor, + tokenizer + ); + + leftCursor.free(); + rightCursor.free(); + + const jsResult = toTextWithCursors(result); + const history = result.history().map(toSpanWithHistory); + result.free(); + + return { + ...jsResult, + history, + }; + } + + return { reconcile, diff, undiff, reconcileWithHistory }; +} diff --git a/reconcile-js/src/index.rn.ts b/reconcile-js/src/index.rn.ts new file mode 100644 index 0000000..1487a59 --- /dev/null +++ b/reconcile-js/src/index.rn.ts @@ -0,0 +1,47 @@ +// React Native entrypoint (resolved via the `react-native` package field). +// +// Hermes — the default React Native engine since RN 0.84 / Expo SDK 56 — does +// not expose a runtime `WebAssembly` global, so the normal `new +// WebAssembly.Module(...)` path used by `index.ts` throws +// `ReferenceError: Property 'WebAssembly' doesn't exist`. +// +// Instead we link a wasm2js translation of the module: pure JavaScript that +// needs no `WebAssembly` global and is instantiated synchronously at import +// time. The public API and its synchronous signatures are unchanged, so +// callers need no modification. The `pkg-rn` directory is generated by +// `scripts/build-rn.mjs`. + +import { + CursorPosition as wasmCursorPosition, + TextWithCursors as wasmTextWithCursors, + reconcile as wasmReconcile, + reconcileWithHistory as wasmReconcileWithHistory, + diff as wasmDiff, + undiff as wasmUndiff, +} from '../pkg-rn/reconcile_text.js'; + +import { makeReconcileApi, type WasmBackend } from './core'; + +const backend: WasmBackend = { + CursorPosition: wasmCursorPosition, + TextWithCursors: wasmTextWithCursors, + reconcile: wasmReconcile, + reconcileWithHistory: wasmReconcileWithHistory, + diff: wasmDiff, + undiff: wasmUndiff, + // The wasm2js module initialises itself at import time, so this is a no-op. + ensureReady() {}, +}; + +export const { reconcile, diff, undiff, reconcileWithHistory } = + makeReconcileApi(backend); + +export type { + BuiltinTokenizer, + History, + CursorPosition, + TextWithCursors, + TextWithOptionalCursors, + TextWithCursorsAndHistory, + SpanWithHistory, +} from './core'; diff --git a/reconcile-js/src/index.test.ts b/reconcile-js/src/index.test.ts index 0de924c..66d385b 100644 --- a/reconcile-js/src/index.test.ts +++ b/reconcile-js/src/index.test.ts @@ -1,4 +1,5 @@ -import { reconcile, reconcileWithHistory, diff, undiff } from './index'; +import * as webApi from './index'; +import * as rnApi from './index.rn'; import { installWasmLeakDetector, checkForWasmLeaks } from './wasm-leak-detector'; import * as fs from 'fs'; import * as path from 'path'; @@ -17,7 +18,18 @@ afterEach(() => { } }); -describe('reconcile', () => { +// `./index` is the web/node build (real WebAssembly); `./index.rn` is the React +// Native build (the wasm2js pure-JS translation). Both are thin backends over the +// same `src/core.ts` wrapper and expose an identical public API, so the behavioural +// suite below runs against both to guarantee they stay in lock-step. +const backends = [ + { name: 'web/node (WebAssembly)', api: webApi }, + { name: 'React Native (wasm2js)', api: rnApi }, +]; + +describe.each(backends)('reconcile [$name]', ({ api }) => { + const { reconcile, reconcileWithHistory, diff, undiff } = api; + it('call reconcile without cursors', () => { expect(reconcile('Hello', 'Hello world', 'Hi world').text).toEqual('Hi world'); }); @@ -60,9 +72,26 @@ describe('reconcile', () => { expect(result.text).toEqual('Hi world'); expect(result.history.length).toBeGreaterThan(0); }); + + it('undiff accepts bigint entries (per the Array type)', () => { + const original = 'Hello world'; + const changed = 'Hello cruel world'; + + // `diff` returns plain numbers; emulate a caller that supplies BigInt, which the + // public signature permits. The wasm2js build rejects raw BigInt, so the shared + // wrapper must normalise it — running this on both backends asserts the contract. + const withBigints = diff(original, changed).map((item) => + typeof item === 'number' ? BigInt(item) : item + ); + + expect(withBigints.some((item) => typeof item === 'bigint')).toBe(true); + expect(undiff(original, withBigints)).toEqual(changed); + }); }); -describe('test_diff_and_undiff_are_inverse', () => { +describe.each(backends)('diff and undiff are inverse [$name]', ({ api }) => { + const { diff, undiff } = api; + const resourcesPath = path.join(__dirname, '../../tests/resources'); const readFileSlice = (fileName: string, start: number, end: number): string => { @@ -93,3 +122,31 @@ describe('test_diff_and_undiff_are_inverse', () => { }); }); }); + +// React-Native-only: Hermes exposes no `WebAssembly` global, which is the whole reason +// the RN entry point links a wasm2js build. Only the wasm2js backend can satisfy this. +describe('React Native (wasm2js) Hermes parity', () => { + const { reconcile, reconcileWithHistory, diff, undiff } = rnApi; + + it('runs every operation with no WebAssembly global', () => { + const descriptor = Object.getOwnPropertyDescriptor(globalThis, 'WebAssembly'); + delete (globalThis as { WebAssembly?: unknown }).WebAssembly; + try { + expect((globalThis as { WebAssembly?: unknown }).WebAssembly).toBeUndefined(); + + expect(reconcile('Hello', 'Hello world', 'Hi world').text).toEqual('Hi world'); + + const changes = diff('Hello world', 'Hello cruel world'); + expect(undiff('Hello world', changes)).toEqual('Hello cruel world'); + + expect( + reconcileWithHistory('Hello', 'Hello world', 'Hi world').history.length + ).toBeGreaterThan(0); + } finally { + // Restore the global so the leak check and later suites are unaffected. + if (descriptor) { + Object.defineProperty(globalThis, 'WebAssembly', descriptor); + } + } + }); +}); diff --git a/reconcile-js/src/index.ts b/reconcile-js/src/index.ts index d00051c..7371169 100644 --- a/reconcile-js/src/index.ts +++ b/reconcile-js/src/index.ts @@ -1,8 +1,7 @@ import { CursorPosition as wasmCursorPosition, - reconcile as wasmReconcile, TextWithCursors as wasmTextWithCursors, - SpanWithHistory as wasmSpanWithHistory, + reconcile as wasmReconcile, reconcileWithHistory as wasmReconcileWithHistory, diff as wasmDiff, undiff as wasmUndiff, @@ -11,341 +10,40 @@ import { import wasmBytes from 'reconcile-text/reconcile_text_bg.wasm'; -// Define the enum values as const arrays to avoid duplication -const BUILTIN_TOKENIZERS = ['Character', 'Line', 'Markdown', 'Word'] as const; -const HISTORY_VALUES = [ - 'Unchanged', - 'AddedFromLeft', - 'AddedFromRight', - 'RemovedFromLeft', - 'RemovedFromRight', -] as const; - -/** - * Tokenisation strategies for text merging. - * - * These correspond to the built-in tokenizers available in the underlying WASM module. - */ -export type BuiltinTokenizer = (typeof BUILTIN_TOKENIZERS)[number]; - -/** - * History classification for text spans in merge results. - * - * Indicates the origin of each text span in the merged document. - */ -export type History = (typeof HISTORY_VALUES)[number]; - -/** - * Represents a text document with associated cursor positions. - * - * This interface is used both as input to reconcile functions (to specify where - * cursors are positioned in the original documents) and as output (with cursors - * automatically repositioned after merging). - */ -export interface TextWithCursors { - /** The document's entire content as a string */ - text: string; - - /** - * Array of cursor positions within the text. Can be empty if there are no cursors to track. - * Each cursor has a unique ID and position. - */ - cursors: CursorPosition[]; -} - -/** - * Like `TextWithCursors`, but cursors may be null or undefined (treated as empty). - * Used as input where cursor tracking is optional. - */ -export interface TextWithOptionalCursors { - /** The document's entire content as a string */ - text: string; - - /** - * Array of cursor positions within the text. Can be null, undefined, or empty - * if there are no cursors to track. Each cursor has a unique ID and position. - */ - cursors: null | undefined | CursorPosition[]; -} - -/** - * Represents a cursor position within a text document. - * - * Cursors are automatically repositioned during text merging to maintain their - * relative positions as text is inserted, deleted, or modified around them. - */ -export interface CursorPosition { - /** Unique identifier for the cursor (can be any number, must be unique within the document) */ - id: number; - - /** Character position in the text, 0-based index from the beginning of the document */ - position: number; -} - -/** - * Represents a merged text document with cursor positions and detailed change history. - * - * This is the return type of `reconcileWithHistory()` and provides complete information - * about how the merge was performed, including which parts of the final text came from - * which source documents. - */ -export interface TextWithCursorsAndHistory { - /** The merged document's entire content */ - text: string; - - /** - * Array of cursor positions within the merged text. Can be empty if there are no cursors to track. - * All cursors are automatically repositioned from the left and right documents. - */ - cursors: CursorPosition[]; - - /** - * Detailed provenance information showing the origin of each text span in the result. - * Each span indicates whether it was unchanged, added from left, added from right, etc. - */ - history: SpanWithHistory[]; -} - -/** - * Represents a span of text in the merged result with its change history. - * - * This shows exactly which source document contributed each piece of text to the - * final merged result. Useful for understanding merge decisions and creating - * visualisations of how documents were combined. - */ -export interface SpanWithHistory { - /** The text content of this span */ - text: string; - - /** The origin of this text span in the merge result */ - history: History; -} - -const UNSUPPORTED_TOKENIZER_ERROR = `Unsupported tokenizer, only ${BUILTIN_TOKENIZERS.join( - ', ' -)} are supported`; +import { makeReconcileApi, type WasmBackend } from './core'; let isInitialised = false; -/** - * Merges three versions of text using intelligent conflict resolution. - * - * This is the primary function for 3-way text merging. Unlike traditional merge tools - * that produce conflict markers, this function automatically resolves conflicts by - * applying both sets of changes where possible. - * - * @param original - The original/base version of the text that both sides diverged from - * @param left - The left version of the text (either string or TextWithCursors with cursor positions) - * @param right - The right version of the text (either string or TextWithCursors with cursor positions) - * @param tokenizer - The tokenisation strategy: "Word" (default, recommended for prose), - * "Character" (fine-grained), or "Line" (similar to git merge) - * @returns The reconciled text with automatically repositioned cursor positions - * - * @example - * ```typescript - * const original = "Hello world"; - * const left = "Hello beautiful world"; // Added "beautiful" - * const right = "Hi world"; // Changed "Hello" to "Hi" - * - * const result = reconcile(original, left, right); - * console.log(result.text); // "Hi beautiful world" - * ``` - */ -export function reconcile( - original: string, - left: string | TextWithOptionalCursors, - right: string | TextWithOptionalCursors, - tokenizer: BuiltinTokenizer = 'Word' -): TextWithCursors { - init(); +const backend: WasmBackend = { + CursorPosition: wasmCursorPosition, + TextWithCursors: wasmTextWithCursors, + reconcile: wasmReconcile, + reconcileWithHistory: wasmReconcileWithHistory, + diff: wasmDiff, + undiff: wasmUndiff, + ensureReady() { + if (isInitialised) { + return; + } - if (!BUILTIN_TOKENIZERS.includes(tokenizer)) { - throw new Error(UNSUPPORTED_TOKENIZER_ERROR); - } + const wasmBinary = Uint8Array.from(atob(wasmBytes as unknown as string), (c) => + c.charCodeAt(0) + ); + initSync({ module: wasmBinary }); - const leftCursor = toWasmTextWithCursors(left); - const rightCursor = toWasmTextWithCursors(right); + isInitialised = true; + }, +}; - const result = wasmReconcile(original, leftCursor, rightCursor, tokenizer); +export const { reconcile, diff, undiff, reconcileWithHistory } = + makeReconcileApi(backend); - leftCursor.free(); - rightCursor.free(); - - const jsResult = toTextWithCursors(result); - result.free(); - - return jsResult; -} - -/** - * Generates a compact diff representation between an original and changed text. - * - * These can be parsed and unpacked using the `undiff` function or the Rust crate's EditedText::from_diff. - * Cursor positions are omitted from the diff result. - * - * This function computes the differences between two versions of text and returns - * a compact representation of those changes. - * - * @param original - The original/base version of the text - * @param changed - The modified version of the text (either string or TextWithCursors with cursor positions) - * @param tokenizer - The tokenisation strategy, which is the same as used in `reconcile`. - * @returns An array of inserts (strings), deletes (negative integers), and retained spans (positive integers). - */ -export function diff( - original: string, - changed: string | TextWithOptionalCursors, - tokenizer: BuiltinTokenizer = 'Word' -): Array { - init(); - - if (!BUILTIN_TOKENIZERS.includes(tokenizer)) { - throw new Error(UNSUPPORTED_TOKENIZER_ERROR); - } - - const changedWasm = toWasmTextWithCursors(changed); - - const result = wasmDiff(original, changedWasm, tokenizer); - - changedWasm.free(); - - return result.map((item) => (typeof item === 'bigint' ? Number(item) : item)); -} - -/** - * Applies a compact diff to an original text to reconstruct the changed version. - * - * This function takes an original text and a compact diff representation (as produced - * by the `diff` function) and reconstructs the modified text. - * - * @param original - The original/base version of the text - * @param diff - The compact diff array (inserts as strings, deletes as negative integers, retained spans as positive integers) - * @param tokenizer - The tokenisation strategy, which is the same as used in `reconcile`. - * @returns The reconstructed changed text as a string. - */ -export function undiff( - original: string, - diff: Array, - tokenizer: BuiltinTokenizer = 'Word' -): string { - init(); - - if (!BUILTIN_TOKENIZERS.includes(tokenizer)) { - throw new Error(UNSUPPORTED_TOKENIZER_ERROR); - } - - return wasmUndiff(original, diff, tokenizer); -} - -/** - * Merges three versions of text and returns detailed provenance information. - * - * This function behaves like `reconcile()` but also provides - * detailed historical information about the origin of each text span in the result. - * This is valuable for understanding how the merge was performed and which changes - * came from which source. - * - * Note: Computing the history is computationally more expensive than the basic merge. - * - * @param original - The original/base version of the text that both sides diverged from - * @param left - The left version of the text (either string or TextWithCursors with cursor positions) - * @param right - The right version of the text (either string or TextWithCursors with cursor positions) - * @param tokenizer - The tokenisation strategy: "Word" (default, recommended for prose), - * "Character" (fine-grained), or "Line" (similar to git merge) - * @returns The reconciled text with cursor positions and detailed change history - * - * @example - * ```typescript - * const original = "Hello world"; - * const left = "Hello beautiful world"; - * const right = "Hi world"; - * - * const result = reconcileWithHistory(original, left, right); - * console.log(result.text); // "Hi beautiful world" - * console.log(result.history); // Array of SpanWithHistory objects showing change origins - * ``` - */ -export function reconcileWithHistory( - original: string, - left: string | TextWithOptionalCursors, - right: string | TextWithOptionalCursors, - tokenizer: BuiltinTokenizer = 'Word' -): TextWithCursorsAndHistory { - init(); - - if (!BUILTIN_TOKENIZERS.includes(tokenizer)) { - throw new Error(UNSUPPORTED_TOKENIZER_ERROR); - } - - const leftCursor = toWasmTextWithCursors(left); - const rightCursor = toWasmTextWithCursors(right); - - const result = wasmReconcileWithHistory(original, leftCursor, rightCursor, tokenizer); - - leftCursor.free(); - rightCursor.free(); - - const jsResult = toTextWithCursors(result); - const history = result.history().map(toSpanWithHistory); - result.free(); - - return { - ...jsResult, - history, - }; -} - -function init() { - if (isInitialised) { - return; - } - - const wasmBinary = Uint8Array.from(atob(wasmBytes as unknown as string), (c) => - c.charCodeAt(0) - ); - initSync({ module: wasmBinary }); - - isInitialised = true; -} - -function toWasmTextWithCursors( - text: string | TextWithOptionalCursors -): wasmTextWithCursors { - const isInputString = typeof text === 'string'; - const leftText = isInputString ? text : text.text; - const leftCursors = isInputString ? [] : (text.cursors ?? []); - - return new wasmTextWithCursors(leftText, leftCursors.map(toWasmCursorPosition)); -} - -function toWasmCursorPosition({ id, position }: CursorPosition): wasmCursorPosition { - return new wasmCursorPosition(id, position); -} - -function toTextWithCursors(textWithCursor: wasmTextWithCursors): TextWithCursors { - const wasmCursors = textWithCursor.cursors(); - const cursors = wasmCursors.map(toCursorPosition); - for (const cursor of wasmCursors) { - cursor.free(); - } - - return { - text: textWithCursor.text(), - cursors, - }; -} - -function toCursorPosition(cursor: wasmCursorPosition): CursorPosition { - return { - id: cursor.id(), - position: cursor.characterIndex(), - }; -} - -function toSpanWithHistory(span: wasmSpanWithHistory): SpanWithHistory { - const result = { - text: span.text(), - history: span.history(), - }; - span.free(); - return result; -} +export type { + BuiltinTokenizer, + History, + CursorPosition, + TextWithCursors, + TextWithOptionalCursors, + TextWithCursorsAndHistory, + SpanWithHistory, +} from './core'; diff --git a/reconcile-js/webpack.config.js b/reconcile-js/webpack.config.js index bf126fa..280bc52 100644 --- a/reconcile-js/webpack.config.js +++ b/reconcile-js/webpack.config.js @@ -2,7 +2,6 @@ const path = require('path'); const { merge } = require('webpack-merge'); const common = { - entry: './src/index.ts', optimization: { // the consuming project should take care of minification minimize: false, @@ -38,8 +37,10 @@ const common = { }; module.exports = [ + // Web build: real WebAssembly, instantiated synchronously from inlined base64. merge(common, { target: 'web', + entry: './src/index.ts', output: { path: path.resolve(__dirname, 'dist'), filename: 'reconcile.web.js', @@ -50,12 +51,31 @@ module.exports = [ globalObject: 'this', }, }), + + // Node build: real WebAssembly. merge(common, { target: 'node', + entry: './src/index.ts', output: { path: path.resolve(__dirname, 'dist'), filename: 'reconcile.node.js', libraryTarget: 'commonjs2', }, }), + + // React Native build: wasm2js (pure JS), for Hermes which has no + // `WebAssembly` global. Sources come from `pkg-rn/` + merge(common, { + target: 'web', + entry: './src/index.rn.ts', + output: { + path: path.resolve(__dirname, 'dist'), + filename: 'reconcile.rn.js', + library: { + name: 'reconcile', + type: 'umd', + }, + globalObject: 'this', + }, + }), ]; diff --git a/reconcile-python/Cargo.lock b/reconcile-python/Cargo.lock index 8e9b36c..07e6da6 100644 --- a/reconcile-python/Cargo.lock +++ b/reconcile-python/Cargo.lock @@ -104,14 +104,14 @@ dependencies = [ [[package]] name = "reconcile-text" -version = "0.9.4" +version = "0.12.0" dependencies = [ "thiserror", ] [[package]] name = "reconcile-text-python" -version = "0.9.4" +version = "0.12.0" dependencies = [ "pyo3", "reconcile-text", diff --git a/reconcile-python/Cargo.toml b/reconcile-python/Cargo.toml index db78e3a..0ecdbf6 100644 --- a/reconcile-python/Cargo.toml +++ b/reconcile-python/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "reconcile-text-python" -version = "0.9.4" +version = "0.12.0" edition = "2024" rust-version = "1.94" authors = ["Andras Schmelczer "] @@ -13,4 +13,4 @@ crate-type = ["cdylib"] [dependencies] reconcile-text = { path = ".." } -pyo3 = { version = "0.28.2", features = ["extension-module"] } +pyo3 = { version = "0.28.2", features = ["extension-module", "abi3-py39"] } diff --git a/reconcile-python/pyproject.toml b/reconcile-python/pyproject.toml index 75d91cf..ba367f2 100644 --- a/reconcile-python/pyproject.toml +++ b/reconcile-python/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "maturin" [project] name = "reconcile-text" -version = "0.9.4" +version = "0.12.0" description = "Intelligent 3-way text merging with automated conflict resolution" readme = "README.md" license = { text = "MIT" } diff --git a/reconcile-python/uv.lock b/reconcile-python/uv.lock index de403a2..8a3d3a4 100644 --- a/reconcile-python/uv.lock +++ b/reconcile-python/uv.lock @@ -168,7 +168,7 @@ wheels = [ [[package]] name = "reconcile-text" -version = "0.9.4" +version = "0.12.0" source = { editable = "." } [package.dev-dependencies] diff --git a/scripts/lint.sh b/scripts/lint.sh index c46991d..6ae4f66 100755 --- a/scripts/lint.sh +++ b/scripts/lint.sh @@ -5,9 +5,6 @@ set -e which cargo-machete || cargo install cargo-machete cargo machete -which lychee || cargo install lychee -lychee --verbose --exclude npmjs.com README.md - cargo clippy --all-targets --all-features --fix --allow-dirty --allow-staged cargo fmt --all diff --git a/src/tokenizer/token.rs b/src/tokenizer/token.rs index 5d82eb5..0f45d41 100644 --- a/src/tokenizer/token.rs +++ b/src/tokenizer/token.rs @@ -1,4 +1,7 @@ -use std::fmt::Debug; +use std::{ + fmt::Debug, + hash::{Hash, Hasher}, +}; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; @@ -78,3 +81,14 @@ where self.normalized == other.normalized } } + +/// Hashes based on the `normalized` field only, consistent with the +/// [`PartialEq`] implementation. +impl Hash for Token +where + T: PartialEq + Clone + Debug + Hash, +{ + fn hash(&self, state: &mut H) { + self.normalized.hash(state); + } +} diff --git a/src/utils/myers_diff.rs b/src/utils/myers_diff.rs index b9a8b25..f04df56 100644 --- a/src/utils/myers_diff.rs +++ b/src/utils/myers_diff.rs @@ -11,13 +11,11 @@ //! The implementation of this algorithm is based on the implementation by //! Brandon Williams. //! -//! # Heuristics +//! # Complexity //! -//! At present this implementation of Myers' does not implement any more -//! advanced heuristics that would solve some pathological cases. For instance -//! passing two large and completely distinct sequences to the algorithm will -//! make it spin without making reasonable progress. -//! For potential improvements here see [similar#15](https://github.com/mitsuhiko/similar/issues/15). +//! The worst case (completely dissimilar inputs) is `O((N+M)²)` time. In +//! practice the divide-and-conquer strategy with prefix/suffix stripping keeps +//! subproblems small for typical text. use std::{ fmt::Debug, @@ -41,26 +39,21 @@ pub fn myers_diff(old: &[Token], new: &[Token]) -> Vec> where T: PartialEq + Clone + Debug, { - let max_d = (old.len() + new.len()).div_ceil(2) + 1; - let mut vb = V::new(max_d); - let mut vf = V::new(max_d); - let mut result = Vec::new(); + let max_edit_distance = (old.len() + new.len()).div_ceil(2) + 1; + let mut backward_endpoints = FurthestEndpoints::new(max_edit_distance); + let mut forward_endpoints = FurthestEndpoints::new(max_edit_distance); + let mut result = Vec::with_capacity(old.len() + new.len()); conquer( old, 0..old.len(), new, 0..new.len(), - &mut vf, - &mut vb, + &mut forward_endpoints, + &mut backward_endpoints, &mut result, ); - debug_assert!( - result.iter().all(|op| op.tokens().len() == 1), - "All operations must be of length 1" - ); - result } @@ -68,50 +61,52 @@ where // edges. All D-paths consist of a (D - 1)-path followed by a non-diagonal edge // and then a possibly empty sequence of diagonal edges called a snake. -/// `V` contains the endpoints of the furthest reaching `D-paths`. For each -/// recorded endpoint `(x,y)` in diagonal `k`, we only need to retain `x` -/// because `y` can be computed from `x - k`. In other words, `V` is an array of -/// integers where `V[k]` contains the row index of the endpoint of the furthest -/// reaching path in diagonal `k`. +/// Contains the endpoints of the furthest reaching `D-paths`. For each +/// recorded endpoint `(x, y)` on diagonal `k`, we only need to retain `x` +/// because `y` can be computed from `x - k`. In other words, this is an array +/// of integers where `endpoints[k]` contains the row index of the endpoint of +/// the furthest reaching path on diagonal `k`. /// -/// We can't use a traditional Vec to represent `V` since we use `k` as an index -/// and it can take on negative values. So instead `V` is represented as a -/// light-weight wrapper around a Vec plus an `offset` which is the maximum -/// value `k` can take on to map negative `k`'s back to a value >= 0. +/// We can't use a traditional Vec since we use `k` as an index and it can take +/// on negative values. So instead this is a light-weight wrapper around a Vec +/// plus an `offset` which is the maximum value `k` can take on, used to map +/// negative `k`'s back to a value >= 0. #[derive(Debug)] -struct V { +struct FurthestEndpoints { offset: isize, - v: Vec, + endpoints: Vec, } -impl V { - fn new(max_d: usize) -> Self { - // max_d should fit in isize for the algorithm to work correctly - let offset = isize::try_from(max_d).expect("max_d must fit in isize"); +impl FurthestEndpoints { + fn new(max_edit_distance: usize) -> Self { + let offset = + isize::try_from(max_edit_distance).expect("max_edit_distance must fit in isize"); Self { offset, - v: vec![0; 2 * max_d + 1], + endpoints: vec![0; 2 * max_edit_distance + 1], } } fn len(&self) -> usize { - self.v.len() + self.endpoints.len() } } -impl Index for V { +impl Index for FurthestEndpoints { type Output = usize; - fn index(&self, index: isize) -> &Self::Output { - let idx = usize::try_from(index + self.offset).expect("index + offset must fit in usize"); - &self.v[idx] + fn index(&self, diagonal: isize) -> &Self::Output { + let idx = + usize::try_from(diagonal + self.offset).expect("diagonal + offset must fit in usize"); + &self.endpoints[idx] } } -impl IndexMut for V { - fn index_mut(&mut self, index: isize) -> &mut Self::Output { - let idx = usize::try_from(index + self.offset).expect("index + offset must fit in usize"); - &mut self.v[idx] +impl IndexMut for FurthestEndpoints { + fn index_mut(&mut self, diagonal: isize) -> &mut Self::Output { + let idx = + usize::try_from(diagonal + self.offset).expect("diagonal + offset must fit in usize"); + &mut self.endpoints[idx] } } @@ -119,6 +114,26 @@ fn split_at(range: Range, at: usize) -> (Range, Range) { (range.start..at, at..range.end) } +/// Adjust a lower diagonal bound so it has the same parity as `edit_distance`. +/// Diagonals are visited in steps of 2, so `lower` must share `edit_distance`'s +/// parity. +fn align_lower_bound(lower: isize, edit_distance: isize) -> isize { + if (lower & 1) == (edit_distance & 1) { + lower + } else { + lower + 1 + } +} + +/// Adjust an upper diagonal bound so it has the same parity as `edit_distance`. +fn align_upper_bound(upper: isize, edit_distance: isize) -> isize { + if (upper & 1) == (edit_distance & 1) { + upper + } else { + upper - 1 + } +} + /// A `Snake` is a sequence of diagonal edges in the edit graph. Normally /// a snake has a start end end point (and it is possible for a snake to have /// a length of zero, meaning the start and end points are the same) however @@ -135,106 +150,143 @@ fn find_middle_snake( old_range: Range, new: &[Token], new_range: Range, - vf: &mut V, - vb: &mut V, + forward_endpoints: &mut FurthestEndpoints, + backward_endpoints: &mut FurthestEndpoints, ) -> Option<(usize, usize)> where T: PartialEq + Clone + Debug, { - let n = old_range.len(); - let m = new_range.len(); + let old_len = old_range.len(); + let new_len = new_range.len(); + + let old_len_signed = isize::try_from(old_len).expect("old_len must fit in isize"); + let new_len_signed = isize::try_from(new_len).expect("new_len must fit in isize"); // By Lemma 1 in the paper, the optimal edit script length is odd or even as // `delta` is odd or even. - let delta = isize::try_from(n).expect("n must fit in isize") - - isize::try_from(m).expect("m must fit in isize"); - let odd = delta & 1 == 1; + let delta = old_len_signed - new_len_signed; + let delta_is_odd = delta & 1 == 1; // The initial point at (0, -1) - vf[1] = 0; + forward_endpoints[1] = 0; // The initial point at (N, M+1) - vb[1] = 0; + backward_endpoints[1] = 0; - let d_max = (n + m).div_ceil(2) + 1; - assert!(vf.len() >= d_max); - assert!(vb.len() >= d_max); + let max_edit_distance = (old_len + new_len).div_ceil(2) + 1; + assert!(forward_endpoints.len() >= max_edit_distance); + assert!(backward_endpoints.len() >= max_edit_distance); + + let max_edit_distance_signed = + isize::try_from(max_edit_distance).expect("max_edit_distance must fit in isize"); + + for edit_distance in 0..max_edit_distance_signed { + // Tighter diagonal bounds: on diagonal k = x - y the constraints + // 0 <= x <= old_len and 0 <= y <= new_len give k in [-new_len, old_len]. + // Intersect with the algorithm's [-edit_distance, edit_distance] + // range and snap to the correct parity (k advances in steps of 2). + let forward_diagonal_lo = + align_lower_bound((-edit_distance).max(-new_len_signed), edit_distance); + let forward_diagonal_hi = + align_upper_bound(edit_distance.min(old_len_signed), edit_distance); - let d_max_isize = isize::try_from(d_max).expect("d_max must fit in isize"); - for d in 0..d_max_isize { // Forward path - for k in (-d..=d).rev().step_by(2) { - let mut x = if k == -d || (k != d && vf[k - 1] < vf[k + 1]) { - vf[k + 1] + for diagonal in (forward_diagonal_lo..=forward_diagonal_hi).rev().step_by(2) { + let mut old_idx = if diagonal == -edit_distance + || (diagonal != edit_distance + && forward_endpoints[diagonal - 1] < forward_endpoints[diagonal + 1]) + { + forward_endpoints[diagonal + 1] } else { - vf[k - 1] + 1 + forward_endpoints[diagonal - 1] + 1 }; - let y = usize::try_from(isize::try_from(x).expect("x must fit in isize") - k) - .expect("x - k must be non-negative and fit in usize"); + let new_idx = usize::try_from( + isize::try_from(old_idx).expect("old_idx must fit in isize") - diagonal, + ) + .expect("old_idx - diagonal must be non-negative and fit in usize"); // The coordinate of the start of a snake - let (x0, y0) = (x, y); - // While these sequences are identical, keep moving through the - // graph with no cost - if x < old_range.len() && y < new_range.len() { + let (snake_start_old, snake_start_new) = (old_idx, new_idx); + + // While these sequences are identical, keep moving through the + // graph with no cost + if old_idx < old_range.len() && new_idx < new_range.len() { let advance = common_prefix_len( old, - old_range.start + x..old_range.end, + old_range.start + old_idx..old_range.end, new, - new_range.start + y..new_range.end, + new_range.start + new_idx..new_range.end, ); - x += advance; + old_idx += advance; } // This is the new best x value - vf[k] = x; + forward_endpoints[diagonal] = old_idx; // Only check for connections from the forward search when N - M is // odd and when there is a reciprocal k line coming from the other - // direction. - if odd && (k - delta).abs() <= (d - 1) { - // TODO optimise this so we don't have to compare against n - if vf[k] + vb[-(k - delta)] >= n { - // Return the snake - return Some((x0 + old_range.start, y0 + new_range.start)); - } + // direction. Forward diagonal k maps to backward diagonal + // (delta - k). Overlap occurs when the combined forward + backward + // reach covers the full width: + // forward_endpoints[k] + backward_endpoints[delta - k] >= old_len. + if delta_is_odd + && (diagonal - delta).abs() <= (edit_distance - 1) + && forward_endpoints[diagonal] + backward_endpoints[-(diagonal - delta)] >= old_len + { + return Some(( + snake_start_old + old_range.start, + snake_start_new + new_range.start, + )); } } - // Backward path - for k in (-d..=d).rev().step_by(2) { - let mut x = if k == -d || (k != d && vb[k - 1] < vb[k + 1]) { - vb[k + 1] - } else { - vb[k - 1] + 1 - }; - let mut y = usize::try_from(isize::try_from(x).expect("x must fit in isize") - k) - .expect("x - k must be non-negative and fit in usize"); + let backward_diagonal_lo = + align_lower_bound((-edit_distance).max(-new_len_signed), edit_distance); + let backward_diagonal_hi = + align_upper_bound(edit_distance.min(old_len_signed), edit_distance); - // The coordinate of the start of a snake - if x < n && y < m { + // Backward path + for diagonal in (backward_diagonal_lo..=backward_diagonal_hi) + .rev() + .step_by(2) + { + let mut old_idx = if diagonal == -edit_distance + || (diagonal != edit_distance + && backward_endpoints[diagonal - 1] < backward_endpoints[diagonal + 1]) + { + backward_endpoints[diagonal + 1] + } else { + backward_endpoints[diagonal - 1] + 1 + }; + let mut new_idx = usize::try_from( + isize::try_from(old_idx).expect("old_idx must fit in isize") - diagonal, + ) + .expect("old_idx - diagonal must be non-negative and fit in usize"); + + // Extend the snake backward (matching suffix) + if old_idx < old_len && new_idx < new_len { let advance = common_suffix_len( old, - old_range.start..old_range.start + n - x, + old_range.start..old_range.start + old_len - old_idx, new, - new_range.start..new_range.start + m - y, + new_range.start..new_range.start + new_len - new_idx, ); - x += advance; - y += advance; + old_idx += advance; + new_idx += advance; } // This is the new best x value - vb[k] = x; + backward_endpoints[diagonal] = old_idx; - if !odd && (k - delta).abs() <= d { - // TODO optimise this so we don't have to compare against n - if vb[k] + vf[-(k - delta)] >= n { - // Return the snake - return Some((n - x + old_range.start, m - y + new_range.start)); - } + if !delta_is_odd + && (diagonal - delta).abs() <= edit_distance + && backward_endpoints[diagonal] + forward_endpoints[-(diagonal - delta)] >= old_len + { + return Some(( + old_len - old_idx + old_range.start, + new_len - new_idx + new_range.start, + )); } } - - // TODO: Maybe there's an opportunity to optimise and bail early? } None @@ -245,54 +297,72 @@ fn conquer( mut old_range: Range, new: &[Token], mut new_range: Range, - vf: &mut V, - vb: &mut V, + forward_endpoints: &mut FurthestEndpoints, + backward_endpoints: &mut FurthestEndpoints, result: &mut Vec>, ) where T: PartialEq + Clone + Debug, { // Check for common prefix - let common_prefix_len = common_prefix_len(old, old_range.clone(), new, new_range.clone()); - if common_prefix_len > 0 { + let prefix_len = common_prefix_len(old, old_range.clone(), new, new_range.clone()); + if prefix_len > 0 { result.extend( - old[old_range.start..old_range.start + common_prefix_len] + old[old_range.start..old_range.start + prefix_len] .iter() .map(|token| RawOperation::Equal(vec![token.clone()])), ); } - old_range.start += common_prefix_len; - new_range.start += common_prefix_len; + old_range.start += prefix_len; + new_range.start += prefix_len; // Check for common suffix - let common_suffix_len = common_suffix_len(old, old_range.clone(), new, new_range.clone()); - let common_suffix = ( - old_range.end - common_suffix_len, - new_range.end - common_suffix_len, - ); - old_range.end -= common_suffix_len; - new_range.end -= common_suffix_len; + let suffix_len = common_suffix_len(old, old_range.clone(), new, new_range.clone()); + let suffix_start = old_range.end - suffix_len; + old_range.end -= suffix_len; + new_range.end -= suffix_len; if old_range.is_empty() && new_range.is_empty() { // do nothing } else if new_range.is_empty() { result.extend( - old[old_range.start..old_range.start + old_range.len()] + old[old_range.start..old_range.end] .iter() .map(|token| RawOperation::Delete(vec![token.clone()])), ); } else if old_range.is_empty() { result.extend( - new[new_range.start..new_range.start + new_range.len()] + new[new_range.start..new_range.end] .iter() .map(|token| RawOperation::Insert(vec![token.clone()])), ); - } else if let Some((x_start, y_start)) = - find_middle_snake(old, old_range.clone(), new, new_range.clone(), vf, vb) - { - let (old_a, old_b) = split_at(old_range, x_start); - let (new_a, new_b) = split_at(new_range, y_start); - conquer(old, old_a, new, new_a, vf, vb, result); - conquer(old, old_b, new, new_b, vf, vb, result); + } else if let Some((split_old, split_new)) = find_middle_snake( + old, + old_range.clone(), + new, + new_range.clone(), + forward_endpoints, + backward_endpoints, + ) { + let (old_before, old_after) = split_at(old_range, split_old); + let (new_before, new_after) = split_at(new_range, split_new); + conquer( + old, + old_before, + new, + new_before, + forward_endpoints, + backward_endpoints, + result, + ); + conquer( + old, + old_after, + new, + new_after, + forward_endpoints, + backward_endpoints, + result, + ); } else { result.extend( old[old_range.start..old_range.end] @@ -306,9 +376,9 @@ fn conquer( ); } - if common_suffix_len > 0 { + if suffix_len > 0 { result.extend( - old[common_suffix.0..common_suffix.0 + common_suffix_len] + old[suffix_start..suffix_start + suffix_len] .iter() .map(|token| RawOperation::Equal(vec![token.clone()])), );