Add tokenizer selector

This commit is contained in:
Andras Schmelczer 2025-07-06 22:06:43 +01:00
parent 0ad3dee468
commit 56e08588ef
No known key found for this signature in database
GPG key ID: FC8F2C3D3D1A718C
3 changed files with 310 additions and 123 deletions

View file

@ -28,31 +28,95 @@
<div class="scroll-container">
<div class="page-wrapper">
<header>
<h1>3-Way Text Merge</h1>
<h1>Reconcile: automated 3-way text merge</h1>
<p>
The
<a
href="https://github.com/schmelczer/reconcile"
target="_blank"
rel="noopener noreferrer"
>reconcile</a
>
library solves a fundamental challenge in collaborative editing: what happens
when multiple users edit the same text simultaneously but we can only capture
the end result, not the intermediary edits? Essentially, it's
<a
href="https://www.gnu.org/software/diffutils/manual/html_node/Invoking-diff3.html"
target="_blank"
rel="noopener noreferrer"
>diff3</a
>
(or <code>git merge</code>) but with automatic conflict resolution.
</p>
<p>
The
<a href="https://github.com/schmelczer/reconcile" target="_blank">reconcile</a>
solves a fundamental challenge in collaborative editing: what happens when
multiple people edit the same text simultaneously?
<code>reconcile(parent: str, left: str, right: str) -> str</code>
takes conflicting concurrent edits and intelligently merges them into a unified
result. Beyond basic conflict resolution, it offers sophisticated merging
heuristics, flexible tokenization options, and cursor position tracking.
takes conflicting concurrent edits and intelligently merges them into a
unified result. Beyond basic conflict resolution, it offers sophisticated
merging heuristics, flexible tokenization options, and cursor position
tracking.
</p>
<p>
The algorithm begins with your chosen tokenizer, then applies Myers' diff
algorithm to compare the original text with both conflicting versions. These
diffs undergo transformation to preserve meaningful change sequences, before a
final merge strategy—inspired by Operational Transformation (OT)—reconciles all
final merge strategy—inspired by Operational Transformation reconciles all
conflicting modifications without losing any edits.
</p>
<p>
For more details, see the
<a href="https://github.com/schmelczer/reconcile" target="_blank">README</a>.
</p>
<p>
Use the tokenization options below to experiment with different strategies.
The library supports user-defined tokenizers as well.
</p>
</header>
<main>
<section class="tokenizer-selector">
<div class="radio-group" role="radiogroup" aria-label="Tokenization strategy">
<label class="radio-option">
<input
type="radio"
name="tokenizer"
value="Character"
id="tokenizer-character"
/>
<span class="radio-custom" aria-hidden="true"></span>
<div class="radio-content">
<span class="radio-label">Character</span>
<span class="radio-description">Split by individual characters</span>
</div>
</label>
<label class="radio-option">
<input
type="radio"
name="tokenizer"
value="Word"
id="tokenizer-word"
checked
/>
<span class="radio-custom" aria-hidden="true"></span>
<div class="radio-content">
<span class="radio-label">Word</span>
<span class="radio-description">Split by words (default)</span>
</div>
</label>
<label class="radio-option">
<input type="radio" name="tokenizer" value="Line" id="tokenizer-line" />
<span class="radio-custom" aria-hidden="true"></span>
<div class="radio-content">
<span class="radio-label">Line</span>
<span class="radio-description"
>Split by lines similarly to <code>git merge</code></span
>
</div>
</label>
</div>
</section>
<div class="text-area-card diamond-parent">
<label
for="original"
@ -86,6 +150,7 @@
<div class="text-area-card diamond-result">
<label
for="merged"
title="Read-only. Change the above text boxes to change the content of this box."
>
Deconflicted result
@ -99,16 +164,17 @@
stroke-width="2"
stroke-linecap="round"
stroke-linejoin="round"
aria-hidden="true"
>
<path stroke="none" d="M0 0h24v24H0z" fill="none" />
<path stroke="none" d="M0 0h24v24H0z" fill="none"></path>
<path
d="M10 10l-6 6v4h4l6 -6m1.99 -1.99l2.504 -2.504a2.828 2.828 0 1 0 -4 -4l-2.5 2.5"
/>
<path d="M13.5 6.5l4 4" />
<path d="M3 3l18 18" />
></path>
<path d="M13.5 6.5l4 4"></path>
<path d="M3 3l18 18"></path>
</svg>
</label>
<div id="merged"></div>
<div id="merged" role="textbox" aria-readonly="true" aria-live="polite"></div>
</div>
</main>
@ -140,7 +206,8 @@
</div>
</div>
<noscript>JavaScript is required for this website.</noscript>
<noscript>JavaScript is required for this website to function properly.</noscript>
<script inline inline-asset="index.js" inline-asset-delete></script>
</body>
</html>

View file

@ -1,52 +1,54 @@
import { init, reconcileWithHistory } from 'reconcile';
import type { Tokenizer } from 'reconcile';
import './style.scss';
const originalTextArea = document.getElementById('original') as HTMLTextAreaElement;
const leftTextArea = document.getElementById('left') as HTMLTextAreaElement;
const rightTextArea = document.getElementById('right') as HTMLTextAreaElement;
const mergedTextArea = document.getElementById('merged') as HTMLDivElement;
const tokenizerRadios = document.querySelectorAll(
'input[name="tokenizer"]'
) as NodeListOf<HTMLInputElement>;
const sampleText = `The \`reconcile\` Rust library is embedded on this page a WASM module and it powers these text boxes. Experiment with the "Original", "First concurrent edit", and "Second concurrent edit" text boxes to watch competing changes merge in real-time within the "Deconflicted result" box. Here, you will see color-coded tokens marking the origin of each token, including ones that got deleted. The result highly depends on the tokenization strategy, for example, deciding how casing or white-spacing is taken into account.`;
const sampleText = `The \`reconcile\` Rust library is embedded on this page as a WASM module and powers these text boxes. Experiment with changing the "Original", "First concurrent edit", and "Second concurrent edit" text boxes to see competing changes get merged in real-time within the "Deconflicted result" box. Here, you will see color-coded tokens marking the origin of each token, including ones that got deleted. The result highly depends on the tokenization strategy, for example, deciding how casing or whitespace is taken into account.`;
async function main(): Promise<void> {
await init();
originalTextArea?.addEventListener('input', updateMergedText);
leftTextArea?.addEventListener('input', updateMergedText);
rightTextArea?.addEventListener('input', updateMergedText);
originalTextArea.addEventListener('input', updateMergedText);
leftTextArea.addEventListener('input', updateMergedText);
rightTextArea.addEventListener('input', updateMergedText);
window.addEventListener('resize', resizeTextAreas);
tokenizerRadios.forEach((radio) => {
radio.addEventListener('change', updateMergedText);
});
loadSample();
updateMergedText();
if (leftTextArea) focusTextArea(leftTextArea);
focusTextArea(leftTextArea);
}
function loadSample(): void {
if (originalTextArea) originalTextArea.value = sampleText;
if (leftTextArea) {
originalTextArea.value = sampleText;
leftTextArea.value =
sampleText.replace('color', 'colour') +
" Check out what's the most complex conflict you can come up with!";
}
if (rightTextArea) {
rightTextArea.value = sampleText
.replace(', for example,', ' such as')
.replace('WASM', 'WebAssembly');
}
}
function updateMergedText(): void {
resizeTextAreas();
if (!originalTextArea || !leftTextArea || !rightTextArea || !mergedTextArea) {
return;
}
const original = originalTextArea.value;
const left = leftTextArea.value;
const right = rightTextArea.value;
const results = reconcileWithHistory(original, left, right);
const selectedTokenizer = getSelectedTokenizer();
const results = reconcileWithHistory(original, left, right, selectedTokenizer);
mergedTextArea.innerHTML = '';
@ -58,11 +60,17 @@ function updateMergedText(): void {
}
}
function getSelectedTokenizer(): Tokenizer {
const selectedRadio = Array.from(tokenizerRadios).find((radio) => radio.checked);
return selectedRadio?.value as Tokenizer;
}
function resizeTextAreas(): void {
// Only auto-resize if field-sizing CSS property is not supported, like in Safari as of now
if (!CSS.supports('field-sizing', 'content')) {
if (originalTextArea) autoResize(originalTextArea);
if (leftTextArea) autoResize(leftTextArea);
if (rightTextArea) autoResize(rightTextArea);
autoResize(originalTextArea);
autoResize(leftTextArea);
autoResize(rightTextArea);
}
}

View file

@ -68,7 +68,7 @@ header > p:not(:first-of-type) {
main {
display: grid;
grid-template-rows: min-content;
grid-template-rows: min-content min-content min-content;
grid-template-columns: 1fr 1fr;
gap: 20px;
justify-items: center;
@ -76,23 +76,120 @@ main {
padding: 32px;
}
.tokenizer-selector {
grid-column: 1 / -1;
grid-row: 1;
width: 100%;
margin-bottom: 8px;
}
.radio-group {
display: flex;
gap: 16px;
justify-content: center;
flex-wrap: wrap;
}
.radio-option {
display: flex;
align-items: center;
gap: 12px;
padding: 16px 20px;
background: #fff;
border-radius: 12px;
box-shadow: 0 2px 8px rgba(36, 81, 166, 0.08);
cursor: pointer;
transition: all 0.2s ease;
border: 2px solid transparent;
min-width: 180px;
position: relative;
}
.radio-option:hover {
box-shadow: 0 4px 16px rgba(36, 81, 166, 0.12);
transform: translateY(-2px);
}
.radio-option:has(input:checked) {
background: #f0f7ff;
border-color: #2451a6;
box-shadow: 0 4px 16px rgba(36, 81, 166, 0.16);
}
.radio-option input[type='radio'] {
position: absolute;
opacity: 0;
pointer-events: none;
}
.radio-custom {
width: 20px;
height: 20px;
border: 2px solid #d1d5db;
border-radius: 50%;
position: relative;
transition: all 0.2s ease;
flex-shrink: 0;
}
.radio-option:has(input:checked) .radio-custom {
border-color: #2451a6;
background: #2451a6;
}
.radio-custom::after {
content: '';
position: absolute;
top: 50%;
left: 50%;
transform: translate(-50%, -50%) scale(0);
width: 8px;
height: 8px;
border-radius: 50%;
background: white;
transition: transform 0.2s ease;
}
.radio-option:has(input:checked) .radio-custom::after {
transform: translate(-50%, -50%) scale(1);
}
.radio-content {
display: flex;
flex-direction: column;
gap: 2px;
}
.radio-label {
font-weight: 600;
color: #2451a6;
font-size: 0.95rem;
}
.radio-description {
font-size: 0.8rem;
color: #6b7280;
line-height: 1.2;
}
.diamond-parent {
grid-column: 1 / -1;
grid-row: 2;
}
.diamond-left {
grid-column: 1;
grid-row: 2;
grid-row: 3;
}
.diamond-right {
grid-column: 2;
grid-row: 2;
grid-row: 3;
}
.diamond-result {
grid-column: 1 / -1;
grid-row: 3;
grid-row: 4;
}
.diamond-result label {
@ -196,28 +293,43 @@ textarea {
@media (max-width: 768px) {
main {
grid-template-columns: 1fr;
grid-template-rows: auto auto auto auto;
grid-template-rows: auto auto auto auto auto;
}
.diamond-parent {
.tokenizer-selector {
grid-column: 1;
grid-row: 1;
}
.diamond-left {
.diamond-parent {
grid-column: 1;
grid-row: 2;
}
.diamond-right {
.diamond-left {
grid-column: 1;
grid-row: 3;
}
.diamond-result {
.diamond-right {
grid-column: 1;
grid-row: 4;
}
.radio-group {
flex-direction: column;
gap: 12px;
}
.radio-option {
min-width: unset;
width: 100%;
}
.diamond-result {
grid-column: 1;
grid-row: 5;
}
}
footer {