wip

2024-11-17 22:12:27 +00:00 · 2024-11-17 22:12:27 +00:00 · 7f6973389f
commit 7f6973389f
parent a471bf6855
21 changed files with 30682 additions and 236 deletions
--- a/backend/reconcile/Cargo.toml
+++ b/backend/reconcile/Cargo.toml
@ -0,0 +1,18 @@
+[package]
+name = "reconcile"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+ropey = { version = "1.6.1", default-features = false, features = ["simd"]  } #
+thiserror = {workspace = true}
+log = {workspace = true}
+serde = { version = "1.0.215", optional = true }
+
+[features]
+serde = [ "dep:serde" ]
+
+[dev-dependencies]
+insta = "1.41.1"
+itertools = "0.13.0"
+pretty_assertions = "1.4.1"
--- a/backend/reconcile/src/diffs/lcs.rs
+++ b/backend/reconcile/src/diffs/lcs.rs
@ -0,0 +1,165 @@
+//! LCS diff algorithm.
+//!
+//! * time: `O((NM)D log (M)D)`
+//! * space `O(MN)`
+use std::collections::BTreeMap;
+use std::ops::{Index, Range};
+
+use crate::tokenizer::token::Token;
+
+use super::raw_operation::RawOperation;
+use super::utils::{common_prefix_len, common_suffix_len};
+
+/// LCS diff algorithm.
+/// Copied from https://github.com/mitsuhiko/similar/blob/7e15c44de11a1cd61e1149189929e189ef977fd8/src/algorithms/lcs.rs
+pub fn diff(old: &[Token], new: &[Token]) -> Vec<RawOperation> {
+    let common_prefix_len = common_prefix_len(old, 0..old.len(), new, 0..new.len());
+    let common_suffix_len = common_suffix_len(
+        old,
+        common_prefix_len..old.len(),
+        new,
+        common_prefix_len..new.len(),
+    );
+
+    let maybe_table = make_table(
+        old,
+        common_prefix_len..(old.len() - common_suffix_len),
+        new,
+        common_prefix_len..(new.len() - common_suffix_len),
+    );
+    let mut old_idx = 0;
+    let mut new_idx = 0;
+    let new_len = new.len() - common_prefix_len - common_suffix_len;
+    let old_len = old.len() - common_prefix_len - common_suffix_len;
+
+    let mut result: Vec<RawOperation> = Vec::new();
+    if common_prefix_len > 0 {
+        result.push(RawOperation::Equal(old[0..common_prefix_len].to_vec()));
+    }
+
+    if let Some(table) = maybe_table {
+        while new_idx < new_len && old_idx < old_len {
+            let old_orig_idx = common_prefix_len + old_idx;
+            let new_orig_idx = common_prefix_len + new_idx;
+
+            if new[new_orig_idx] == old[old_orig_idx] {
+                result.push(RawOperation::Equal(vec![old[old_orig_idx].clone()]));
+                old_idx += 1;
+                new_idx += 1;
+            } else if table.get(&(new_idx, old_idx + 1)).unwrap_or(&0)
+                >= table.get(&(new_idx + 1, old_idx)).unwrap_or(&0)
+            {
+                result.push(RawOperation::Delete(vec![old[old_orig_idx].clone()]));
+                old_idx += 1;
+            } else {
+                result.push(RawOperation::Insert(vec![new[new_orig_idx].clone()]));
+                new_idx += 1;
+            }
+        }
+    } else {
+        let old_orig_idx = common_prefix_len + old_idx;
+        let new_orig_idx = common_prefix_len + new_idx;
+
+        result.push(RawOperation::Delete(
+            old[old_orig_idx..old_orig_idx + old_len].to_vec(),
+        ));
+        result.push(RawOperation::Insert(
+            new[new_orig_idx..new_orig_idx + new_len].to_vec(),
+        ));
+    }
+
+    if old_idx < old_len {
+        result.push(RawOperation::Delete(
+            old[common_prefix_len + old_idx..common_prefix_len + old_len].to_vec(),
+        ));
+        old_idx += old_len - old_idx;
+    }
+
+    if new_idx < new_len {
+        result.push(RawOperation::Insert(
+            new[common_prefix_len + new_idx..common_prefix_len + new_len].to_vec(),
+        ));
+    }
+
+    if common_suffix_len > 0 {
+        result.push(RawOperation::Equal(
+            old[old_len + common_prefix_len..old_len + common_prefix_len + common_suffix_len]
+                .to_vec(),
+        ));
+    }
+
+    result
+}
+
+fn make_table<Old, New>(
+    old: &Old,
+    old_range: Range<usize>,
+    new: &New,
+    new_range: Range<usize>,
+) -> Option<BTreeMap<(usize, usize), u32>>
+where
+    Old: Index<usize> + ?Sized,
+    New: Index<usize> + ?Sized,
+    New::Output: PartialEq<Old::Output>,
+{
+    let old_len = old_range.len();
+    let new_len = new_range.len();
+    let mut table = BTreeMap::new();
+
+    for i in (0..new_len).rev() {
+        for j in (0..old_len).rev() {
+            let val = if new[i] == old[j] {
+                table.get(&(i + 1, j + 1)).unwrap_or(&0) + 1
+            } else {
+                *table
+                    .get(&(i + 1, j))
+                    .unwrap_or(&0)
+                    .max(table.get(&(i, j + 1)).unwrap_or(&0))
+            };
+            if val > 0 {
+                table.insert((i, j), val);
+            }
+        }
+    }
+
+    Some(table)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use pretty_assertions::assert_eq;
+    use std::collections::BTreeMap;
+
+    #[test]
+    fn test_table() {
+        let table = make_table(&vec![2, 3], 0..2, &vec![0, 1, 2], 0..3).unwrap();
+        let expected = {
+            let mut m = BTreeMap::new();
+            m.insert((1, 0), 1);
+            m.insert((0, 0), 1);
+            m.insert((2, 0), 1);
+            m
+        };
+        assert_eq!(table, expected);
+    }
+
+    #[test]
+    fn test_empty_examples() {
+        assert_eq!(diff(&[], &[]), vec![]);
+        assert_eq!(
+            diff(&[Token::new("a".to_string(), "a".to_string())], &[]),
+            vec![RawOperation::Delete(vec![Token::new(
+                "a".to_string(),
+                "a".to_string()
+            )])]
+        );
+        assert_eq!(
+            diff(&[], &[Token::new("a".to_string(), "a".to_string())]),
+            vec![RawOperation::Insert(vec![Token::new(
+                "a".to_string(),
+                "a".to_string()
+            )])]
+        );
+    }
+}
--- a/backend/reconcile/src/diffs/mod.rs
+++ b/backend/reconcile/src/diffs/mod.rs
@ -0,0 +1,4 @@
+pub mod lcs;
+pub mod myers;
+pub mod raw_operation;
+mod utils;
--- a/backend/reconcile/src/diffs/myers.rs
+++ b/backend/reconcile/src/diffs/myers.rs
@ -0,0 +1,310 @@
+//! Taken from https://github.com/mitsuhiko/similar/blob/7e15c44de11a1cd61e1149189929e189ef977fd8/src/algorithms/myers.rs
+//! Myers' diff algorithm.
+//!
+//! * time: `O((N+M)D)`
+//! * space `O(N+M)`
+//!
+//! See [the original article by Eugene W. Myers](http://www.xmailserver.org/diff2.pdf)
+//! describing it.
+//!
+//! The implementation of this algorithm is based on the implementation by
+//! Brandon Williams.
+//!
+//! # Heuristics
+//!
+//! At present this implementation of Myers' does not implement any more advanced
+//! heuristics that would solve some pathological cases.  For instance passing two
+//! large and completely distinct sequences to the algorithm will make it spin
+//! without making reasonable progress.  Currently the only protection in the
+//! library against this is to pass a deadline to the diffing algorithm.
+//!
+//! For potential improvements here see [similar#15](https://github.com/mitsuhiko/similar/issues/15).
+
+use std::ops::{Index, IndexMut, Range};
+use std::time::Instant;
+use std::vec;
+
+use crate::tokenizer::token::Token;
+
+use super::raw_operation::RawOperation;
+use super::utils::{common_prefix_len, common_suffix_len};
+
+/// Myers' diff algorithm.
+///
+/// Diff `old`, between indices `old_range` and `new` between indices `new_range`.
+pub fn diff(old: &[Token], new: &[Token]) -> Vec<RawOperation> {
+    diff_deadline(old, new, None)
+}
+
+/// Myers' diff algorithm with deadline.
+///
+/// Diff `old`, between indices `old_range` and `new` between indices `new_range`.
+///
+/// This diff is done with an optional deadline that defines the maximal
+/// execution time permitted before it bails and falls back to an approximation.
+pub fn diff_deadline(old: &[Token], new: &[Token], deadline: Option<Instant>) -> Vec<RawOperation> {
+    let max_d = max_d(old.len(), new.len());
+    let mut vb = V::new(max_d);
+    let mut vf = V::new(max_d);
+    let mut result: Vec<RawOperation> = vec![];
+    conquer(
+        old,
+        0..old.len(),
+        new,
+        0..new.len(),
+        &mut vf,
+        &mut vb,
+        &mut result,
+        deadline,
+    );
+    result
+}
+
+// A D-path is a path which starts at (0,0) that has exactly D non-diagonal
+// edges. All D-paths consist of a (D - 1)-path followed by a non-diagonal edge
+// and then a possibly empty sequence of diagonal edges called a snake.
+
+/// `V` contains the endpoints of the furthest reaching `D-paths`. For each
+/// recorded endpoint `(x,y)` in diagonal `k`, we only need to retain `x` because
+/// `y` can be computed from `x - k`. In other words, `V` is an array of integers
+/// where `V[k]` contains the row index of the endpoint of the furthest reaching
+/// path in diagonal `k`.
+///
+/// We can't use a traditional Vec to represent `V` since we use `k` as an index
+/// and it can take on negative values. So instead `V` is represented as a
+/// light-weight wrapper around a Vec plus an `offset` which is the maximum value
+/// `k` can take on in order to map negative `k`'s back to a value >= 0.
+#[derive(Debug)]
+struct V {
+    offset: isize,
+    v: Vec<usize>, // Look into initializing this to -1 and storing isize
+}
+
+impl V {
+    fn new(max_d: usize) -> Self {
+        Self {
+            offset: max_d as isize,
+            v: vec![0; 2 * max_d],
+        }
+    }
+
+    fn len(&self) -> usize {
+        self.v.len()
+    }
+}
+
+impl Index<isize> for V {
+    type Output = usize;
+
+    fn index(&self, index: isize) -> &Self::Output {
+        &self.v[(index + self.offset) as usize]
+    }
+}
+
+impl IndexMut<isize> for V {
+    fn index_mut(&mut self, index: isize) -> &mut Self::Output {
+        &mut self.v[(index + self.offset) as usize]
+    }
+}
+
+fn max_d(len1: usize, len2: usize) -> usize {
+    // XXX look into reducing the need to have the additional '+ 1'
+    (len1 + len2 + 1) / 2 + 1
+}
+
+#[inline(always)]
+fn split_at(range: Range<usize>, at: usize) -> (Range<usize>, Range<usize>) {
+    (range.start..at, at..range.end)
+}
+
+/// A `Snake` is a sequence of diagonal edges in the edit graph.  Normally
+/// a snake has a start end end point (and it is possible for a snake to have
+/// a length of zero, meaning the start and end points are the same) however
+/// we do not need the end point which is why it's not implemented here.
+///
+/// The divide part of a divide-and-conquer strategy. A D-path has D+1 snakes
+/// some of which may be empty. The divide step requires finding the ceil(D/2) +
+/// 1 or middle snake of an optimal D-path. The idea for doing so is to
+/// simultaneously run the basic algorithm in both the forward and reverse
+/// directions until furthest reaching forward and reverse paths starting at
+/// opposing corners 'overlap'.
+fn find_middle_snake(
+    old: &[Token],
+    old_range: Range<usize>,
+    new: &[Token],
+    new_range: Range<usize>,
+    vf: &mut V,
+    vb: &mut V,
+    deadline: Option<Instant>,
+) -> Option<(usize, usize)> {
+    let n = old_range.len();
+    let m = new_range.len();
+
+    // By Lemma 1 in the paper, the optimal edit script length is odd or even as
+    // `delta` is odd or even.
+    let delta = n as isize - m as isize;
+    let odd = delta & 1 == 1;
+
+    // The initial point at (0, -1)
+    vf[1] = 0;
+    // The initial point at (N, M+1)
+    vb[1] = 0;
+
+    // We only need to explore ceil(D/2) + 1
+    let d_max = max_d(n, m);
+    assert!(vf.len() >= d_max);
+    assert!(vb.len() >= d_max);
+
+    for d in 0..d_max as isize {
+        // are we running for too long?
+        if let Some(deadline) = deadline {
+            if Instant::now() > deadline {
+                break;
+            }
+        }
+
+        // Forward path
+        for k in (-d..=d).rev().step_by(2) {
+            let mut x = if k == -d || (k != d && vf[k - 1] < vf[k + 1]) {
+                vf[k + 1]
+            } else {
+                vf[k - 1] + 1
+            };
+            let y = (x as isize - k) as usize;
+
+            // The coordinate of the start of a snake
+            let (x0, y0) = (x, y);
+            //  While these sequences are identical, keep moving through the
+            //  graph with no cost
+            if x < old_range.len() && y < new_range.len() {
+                let advance = common_prefix_len(
+                    old,
+                    old_range.start + x..old_range.end,
+                    new,
+                    new_range.start + y..new_range.end,
+                );
+                x += advance;
+            }
+
+            // This is the new best x value
+            vf[k] = x;
+
+            // Only check for connections from the forward search when N - M is
+            // odd and when there is a reciprocal k line coming from the other
+            // direction.
+            if odd && (k - delta).abs() <= (d - 1) {
+                // TODO optimize this so we don't have to compare against n
+                if vf[k] + vb[-(k - delta)] >= n {
+                    // Return the snake
+                    return Some((x0 + old_range.start, y0 + new_range.start));
+                }
+            }
+        }
+
+        // Backward path
+        for k in (-d..=d).rev().step_by(2) {
+            let mut x = if k == -d || (k != d && vb[k - 1] < vb[k + 1]) {
+                vb[k + 1]
+            } else {
+                vb[k - 1] + 1
+            };
+            let mut y = (x as isize - k) as usize;
+
+            // The coordinate of the start of a snake
+            if x < n && y < m {
+                let advance = common_suffix_len(
+                    old,
+                    old_range.start..old_range.start + n - x,
+                    new,
+                    new_range.start..new_range.start + m - y,
+                );
+                x += advance;
+                y += advance;
+            }
+
+            // This is the new best x value
+            vb[k] = x;
+
+            if !odd && (k - delta).abs() <= d {
+                // TODO optimize this so we don't have to compare against n
+                if vb[k] + vf[-(k - delta)] >= n {
+                    // Return the snake
+                    return Some((n - x + old_range.start, m - y + new_range.start));
+                }
+            }
+        }
+
+        // TODO: Maybe there's an opportunity to optimize and bail early?
+    }
+
+    // deadline reached
+    None
+}
+
+fn conquer(
+    old: &[Token],
+    mut old_range: Range<usize>,
+    new: &[Token],
+    mut new_range: Range<usize>,
+    vf: &mut V,
+    vb: &mut V,
+    result: &mut Vec<RawOperation>,
+    deadline: Option<Instant>,
+) {
+    // Check for common prefix
+    let common_prefix_len = common_prefix_len(old, old_range.clone(), new, new_range.clone());
+    if common_prefix_len > 0 {
+        result.push(RawOperation::Equal(
+            old[old_range.start..old_range.start + common_prefix_len].to_vec(),
+        ));
+    }
+    old_range.start += common_prefix_len;
+    new_range.start += common_prefix_len;
+
+    // Check for common suffix
+    let common_suffix_len = common_suffix_len(old, old_range.clone(), new, new_range.clone());
+    let common_suffix = (
+        old_range.end - common_suffix_len,
+        new_range.end - common_suffix_len,
+    );
+    old_range.end -= common_suffix_len;
+    new_range.end -= common_suffix_len;
+
+    if old_range.is_empty() && new_range.is_empty() {
+        // Do nothing
+    } else if new_range.is_empty() {
+        result.push(RawOperation::Delete(
+            old[old_range.start..old_range.start + old_range.len()].to_vec(),
+        ));
+    } else if old_range.is_empty() {
+        result.push(RawOperation::Insert(
+            new[new_range.start..new_range.start + new_range.len()].to_vec(),
+        ));
+    } else if let Some((x_start, y_start)) = find_middle_snake(
+        old,
+        old_range.clone(),
+        new,
+        new_range.clone(),
+        vf,
+        vb,
+        deadline,
+    ) {
+        let (old_a, old_b) = split_at(old_range, x_start);
+        let (new_a, new_b) = split_at(new_range, y_start);
+        conquer(old, old_a, new, new_a, vf, vb, result, deadline);
+        conquer(old, old_b, new, new_b, vf, vb, result, deadline);
+    } else {
+        result.push(RawOperation::Delete(
+            old[old_range.start..old_range.end].to_vec(),
+        ));
+        result.push(RawOperation::Insert(
+            new[new_range.start..new_range.end].to_vec(),
+        ));
+    }
+
+    if common_suffix_len > 0 {
+        result.push(RawOperation::Equal(
+            old[common_suffix.0..common_suffix.0 + common_suffix_len].to_vec(),
+        ));
+    }
+}
--- a/backend/reconcile/src/diffs/raw_operation.rs
+++ b/backend/reconcile/src/diffs/raw_operation.rs
@ -0,0 +1,47 @@
+use crate::tokenizer::token::Token;
+
+#[derive(Debug, Clone, PartialEq)]
+pub enum RawOperation {
+    Insert(Vec<Token>),
+    Delete(Vec<Token>),
+    Equal(Vec<Token>),
+}
+
+impl RawOperation {
+    pub fn tokens(&self) -> &Vec<Token> {
+        match self {
+            RawOperation::Insert(tokens) => tokens,
+            RawOperation::Delete(tokens) => tokens,
+            RawOperation::Equal(tokens) => tokens,
+        }
+    }
+
+    pub fn original_text_length(&self) -> usize {
+        self.tokens()
+            .iter()
+            .map(|t| t.original.chars().count())
+            .sum()
+    }
+
+    pub fn get_original_text(self) -> String {
+        self.tokens().iter().map(|t| t.original.clone()).collect()
+    }
+
+    /// Extends the operation with another operation if returning the new operation.
+    /// Only operations of the same type can be used to extend. If the operations are of different
+    /// types, returns None.
+    pub fn extend(&self, other: &RawOperation) -> Option<RawOperation> {
+        match (self, other) {
+            (RawOperation::Insert(tokens1), RawOperation::Insert(tokens2)) => Some(
+                RawOperation::Insert(tokens1.iter().chain(tokens2.iter()).cloned().collect()),
+            ),
+            (RawOperation::Delete(tokens1), RawOperation::Delete(tokens2)) => Some(
+                RawOperation::Delete(tokens1.iter().chain(tokens2.iter()).cloned().collect()),
+            ),
+            (RawOperation::Equal(tokens1), RawOperation::Equal(tokens2)) => Some(
+                RawOperation::Equal(tokens1.iter().chain(tokens2.iter()).cloned().collect()),
+            ),
+            _ => None,
+        }
+    }
+}
--- a/backend/reconcile/src/diffs/utils.rs
+++ b/backend/reconcile/src/diffs/utils.rs
@ -0,0 +1,86 @@
+use std::ops::{Index, Range};
+
+/// Given two lookups and ranges calculates the length of the common prefix.
+/// Copied from https://github.com/mitsuhiko/similar/blob/7e15c44de11a1cd61e1149189929e189ef977fd8/src/algorithms/utils.rs
+pub fn common_prefix_len<Old, New>(
+    old: &Old,
+    old_range: Range<usize>,
+    new: &New,
+    new_range: Range<usize>,
+) -> usize
+where
+    Old: Index<usize> + ?Sized,
+    New: Index<usize> + ?Sized,
+    New::Output: PartialEq<Old::Output>,
+{
+    new_range
+        .zip(old_range)
+        .take_while(|x| new[x.0] == old[x.1])
+        .count()
+}
+
+/// Given two lookups and ranges calculates the length of common suffix.
+/// Copied from https://github.com/mitsuhiko/similar/blob/7e15c44de11a1cd61e1149189929e189ef977fd8/src/algorithms/utils.rs
+pub fn common_suffix_len<Old, New>(
+    old: &Old,
+    old_range: Range<usize>,
+    new: &New,
+    new_range: Range<usize>,
+) -> usize
+where
+    Old: Index<usize> + ?Sized,
+    New: Index<usize> + ?Sized,
+    New::Output: PartialEq<Old::Output>,
+{
+    new_range
+        .rev()
+        .zip(old_range.rev())
+        .take_while(|x| new[x.0] == old[x.1])
+        .count()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use pretty_assertions::assert_eq;
+
+    #[test]
+    fn test_common_prefix_len() {
+        assert_eq!(
+            common_prefix_len("".as_bytes(), 0..0, "".as_bytes(), 0..0),
+            0
+        );
+        assert_eq!(
+            common_prefix_len("foobarbaz".as_bytes(), 0..9, "foobarblah".as_bytes(), 0..10),
+            7
+        );
+        assert_eq!(
+            common_prefix_len("foobarbaz".as_bytes(), 0..9, "blablabla".as_bytes(), 0..9),
+            0
+        );
+        assert_eq!(
+            common_prefix_len("foobarbaz".as_bytes(), 3..9, "foobarblah".as_bytes(), 3..10),
+            4
+        );
+    }
+
+    #[test]
+    fn test_common_suffix_len() {
+        assert_eq!(
+            common_suffix_len("".as_bytes(), 0..0, "".as_bytes(), 0..0),
+            0
+        );
+        assert_eq!(
+            common_suffix_len("1234".as_bytes(), 0..4, "X0001234".as_bytes(), 0..8),
+            4
+        );
+        assert_eq!(
+            common_suffix_len("1234".as_bytes(), 0..4, "Xxxx".as_bytes(), 0..4),
+            0
+        );
+        assert_eq!(
+            common_suffix_len("1234".as_bytes(), 2..4, "01234".as_bytes(), 2..5),
+            2
+        );
+    }
+}
--- a/backend/reconcile/src/errors.rs
+++ b/backend/reconcile/src/errors.rs
@ -0,0 +1,10 @@
+use thiserror::Error;
+
+#[derive(Error, Debug)]
+pub enum SyncLibError {
+    #[error("Failed to shift the operation's index {0}")]
+    NegativeOperationIndexError(String),
+
+    #[error("Failed to apply operation because {0}")]
+    OperationApplicationError(String),
+}
--- a/backend/reconcile/src/lib.rs
+++ b/backend/reconcile/src/lib.rs
@ -0,0 +1,4 @@
+mod diffs;
+pub mod errors;
+pub mod operations;
+mod tokenizer;
--- a/backend/reconcile/src/operations/mod.rs
+++ b/backend/reconcile/src/operations/mod.rs
@ -0,0 +1,25 @@
+mod operation;
+mod operation_sequence;
+
+pub use operation::Operation;
+pub use operation_sequence::OperationSequence;
+
+#[cfg(test)]
+mod test {
+
+    #[test]
+    fn test_merge() {
+        // let mut original = Rope::from_str("hello world!");
+        // let edit_1 = "hi, world";
+        // let edit_2 = "hello, my friend!";
+
+        // let mut operations_1 = calculate_operations(&original.to_string(), edit_1, 1.0).unwrap();
+        // let mut operations_2 = calculate_operations(&original.to_string(), edit_2, 1.0).unwrap();
+
+        // let result =
+        //     merge_and_apply_operations(&mut original, &mut operations_1, &mut operations_2)
+        //         .unwrap();
+
+        // assert_eq!(result, "hey, my friend!");
+    }
+}
--- a/backend/reconcile/src/operations/operation.rs
+++ b/backend/reconcile/src/operations/operation.rs
@ -1,13 +1,16 @@
 use ropey::Rope;
-use serde::{Deserialize, Serialize};
 use std::fmt::Display;

 use crate::errors::SyncLibError;

+#[cfg(feature = "serde")]
+use serde::{Deserialize, Serialize};
+
 /// Represents a change that can be applied to a text document.
 /// Operation is tied to a ropey::Rope and is mainly expected to be
 /// created by OperationSequence.
-#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub enum Operation {
    Insert {
        index: usize,
@ -17,57 +20,88 @@ pub enum Operation {
    Delete {
        index: usize,
        deleted_character_count: usize,
+
+        #[cfg(debug_assertions)]
+        deleted_text: Option<String>,
    },
 }

 impl Operation {
    /// Creates an insert operation with the given index and text.
    /// If the text is empty (meaning that the operation would be a no-op), returns None.
-    pub fn create_insert(index: usize, text: &str) -> Result<Option<Self>, SyncLibError> {
+    pub fn create_insert(index: usize, text: String) -> Option<Self> {
        if text.is_empty() {
-            return Ok(None);
+            return None;
        }

-        Ok(Some(Operation::Insert {
-            index,
-            text: text.to_string(),
-        }))
+        Some(Operation::Insert { index, text })
    }

    /// Creates a delete operation with the given index and number of to-be-deleted characters.
    /// If the operation would delete 0 (meaning that the operation would be a no-op), returns None.
-    pub fn create_delete(
-        index: usize,
-        deleted_character_count: usize,
-    ) -> Result<Option<Self>, SyncLibError> {
+    pub fn create_delete(index: usize, deleted_character_count: usize) -> Option<Self> {
        if deleted_character_count == 0 {
-            return Ok(None);
+            return None;
        }

-        Ok(Some(Operation::Delete {
+        Some(Operation::Delete {
            index,
            deleted_character_count,
-        }))
+
+            #[cfg(debug_assertions)]
+            deleted_text: None,
+        })
+    }
+
+    pub fn create_delete_with_text(index: usize, text: String) -> Option<Self> {
+        if text.is_empty() {
+            return None;
+        }
+
+        Some(Operation::Delete {
+            index,
+            deleted_character_count: text.chars().count(),
+
+            #[cfg(debug_assertions)]
+            deleted_text: Some(text),
+        })
    }

    /// Tries to apply the operation to the given ropey::Rope text, returning the modified text.
    pub fn apply<'a>(&self, rope_text: &'a mut Rope) -> Result<&'a mut Rope, SyncLibError> {
-        let index: usize = self.start_index();
        match self {
-            Operation::Insert { text, .. } => rope_text.try_insert(index, text).map_err(|err| {
-                SyncLibError::OperationApplicationError(format!("Failed to insert text: {}", err))
-            }),
-            Operation::Delete {
-                deleted_character_count,
-                ..
-            } => rope_text
-                .try_remove(index..index + { *deleted_character_count })
+            Operation::Insert { text, .. } => rope_text
+                .try_insert(self.start_index(), text)
                .map_err(|err| {
+                    SyncLibError::OperationApplicationError(format!(
+                        "Failed to insert text: {}",
+                        err
+                    ))
+                }),
+            Operation::Delete {
+                #[cfg(debug_assertions)]
+                deleted_text,
+                ..
+            } => {
+                debug_assert!(
+                    rope_text.get_slice(self.range()).is_some(),
+                    "Failed to get slice of text to delete"
+                );
+
+                if let Some(text) = deleted_text {
+                    debug_assert_eq!(
+                        rope_text.get_slice(self.range()).unwrap().to_string(),
+                        *text
+                    );
+                }
+
+                rope_text.try_remove(self.range()).map_err(|err| {
                    SyncLibError::OperationApplicationError(format!(
                        "Failed to remove text: {}",
                        err
                    ))
-                }),
+                })
+            }
        }?;

        Ok(rope_text)
@ -104,33 +138,40 @@ impl Operation {
    }

    /// Clones the operation while updating the index.
-    pub fn with_index(&self, index: usize) -> Result<Self, SyncLibError> {
-        Ok(match self {
+    pub fn with_index(&self, index: usize) -> Self {
+        match self {
            Operation::Insert { text, .. } => Operation::Insert {
                index,
                text: text.clone(),
            },
            Operation::Delete {
                deleted_character_count,
+
+                #[cfg(debug_assertions)]
+                deleted_text,
                ..
            } => Operation::Delete {
                index,
                deleted_character_count: *deleted_character_count,
+
+                #[cfg(debug_assertions)]
+                deleted_text: deleted_text.clone(),
            },
-        })
+        }
    }

    /// Clones the operation while shifting the index by the given offset.
    /// The offset can be negative but the resulting index must be non-negative.
    pub fn with_shifted_index(&self, offset: i64) -> Result<Self, SyncLibError> {
        let index = self.start_index() as i64 + offset;
-
-        self.with_index(index.try_into().map_err(|_| {
+        let non_negative_index = index.try_into().map_err(|_| {
            SyncLibError::NegativeOperationIndexError(format!(
                "Index {} is negative but operations must have a non-negative index",
                index
            ))
-        })?)
+        })?;
+
+        Ok(self.with_index(non_negative_index))
    }
 }

@ -138,17 +179,29 @@ impl Display for Operation {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            Operation::Insert { index, text } => {
-                write!(f, "Insert '{}' from index {}", text, index)
+                write!(f, "<insert '{}' from index {}>", text, index)
            }
            Operation::Delete {
                index,
                deleted_character_count,
+
+                #[cfg(debug_assertions)]
+                deleted_text,
            } => {
-                write!(
-                    f,
-                    "Delete {} characters index {}",
-                    deleted_character_count, index
-                )
+                if cfg!(debug_assertions) {
+                    write!(
+                        f,
+                        "<delete '{}' from index {}>",
+                        deleted_text.as_ref().unwrap_or(&"<unknown>".to_string()),
+                        index
+                    )
+                } else {
+                    write!(
+                        f,
+                        "<delete {} characters () from index {}>",
+                        deleted_character_count, index
+                    )
+                }
            }
        }
    }
@ -161,31 +214,15 @@ mod tests {

    #[test]
    fn test_shifting_error() {
-        insta::assert_debug_snapshot!(Operation::create_insert(1, "hi")
-            .unwrap()
+        insta::assert_debug_snapshot!(Operation::create_insert(1, "hi".to_string())
            .unwrap()
            .with_shifted_index(-2));
    }

-    #[test]
-    fn test_apply_delete() -> Result<(), SyncLibError> {
-        let mut rope = Rope::from_str("hello world");
-        let operation = Operation::Delete {
-            index: 5,
-            deleted_character_count: 6,
-        };
-
-        operation.apply(&mut rope)?;
-
-        assert_eq!(rope.to_string(), "hello");
-
-        Ok(())
-    }
-
    #[test]
    fn test_apply_delete_with_create() -> Result<(), SyncLibError> {
        let mut rope = Rope::from_str("hello world");
-        let operation = Operation::create_delete(5, 6)?.unwrap();
+        let operation = Operation::create_delete_with_text(5, "world ".to_string()).unwrap();

        operation.apply(&mut rope)?;

@ -197,22 +234,7 @@ mod tests {
    #[test]
    fn test_apply_insert() -> Result<(), SyncLibError> {
        let mut rope = Rope::from_str("hello");
-        let operation = Operation::Insert {
-            index: 5,
-            text: " my friend".to_string(),
-        };
-
-        operation.apply(&mut rope)?;
-
-        assert_eq!(rope.to_string(), "hello my friend");
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_apply_insert_with_create() -> Result<(), SyncLibError> {
-        let mut rope = Rope::from_str("hello");
-        let operation = Operation::create_insert(5, " my friend")?.unwrap();
+        let operation = Operation::create_insert(5, " my friend".to_string()).unwrap();

        operation.apply(&mut rope)?;

--- a/backend/reconcile/src/operations/operation_sequence.rs
+++ b/backend/reconcile/src/operations/operation_sequence.rs
@ -1,81 +1,90 @@
 use std::cmp::Ordering;

 use super::Operation;
+use crate::diffs::myers::diff;
+use crate::diffs::raw_operation::RawOperation;
 use crate::errors::SyncLibError;
+use crate::tokenizer::token::Token;
 use ropey::Rope;
+
+#[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};
-use similar::Algorithm;
-use similar::{utils::TextDiffRemapper, ChangeTag, TextDiff};

 #[derive(Debug, Clone, Default)]
 struct MergeContext {
-    previous_delete: Option<Operation>,
+    last_delete: Option<Operation>,
    shift: i64,
 }

-pub fn tokenize(text: &str) -> Vec<&str> {
-    text.split_inclusive(|c: char| c.is_whitespace()).collect()
-}
-
-#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
+/// A sequence of operations that can be applied to a text document.
+/// OperationSequence supports merging two sequences of operations using the
+/// principle of Operational Transformation.
+///
+/// It's mainly created through the from_strings method, then merged with another
+/// OperationSequence derived from the same original text and then applied to the original text
+/// to get the reconciled text of concurrent edits.
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[derive(Debug, Clone, PartialEq, Eq, Hash, Default)]
 pub struct OperationSequence {
    operations: Vec<Operation>,
 }

 impl OperationSequence {
+    /// Creates a new OperationSequence with the given operations.
+    /// The operations should be in the order they should be applied.
+    /// The operations must not overlap.
    pub fn new(operations: Vec<Operation>) -> Self {
+        operations
+            .iter()
+            .zip(operations.iter().skip(1))
+            .for_each(|(previous, next)| {
+                debug_assert!(
+                    previous.start_index() <= next.start_index(),
+                    "{} doesn't come before {}",
+                    previous,
+                    next
+                );
+            });
+
        Self { operations }
    }

-    pub fn try_from_string_diff(
-        left: &str,
-        right: &str,
-        diff_ratio_threshold: f32,
-    ) -> Result<Self, SyncLibError> {
-        let left_tokens = tokenize(left);
-        let right_tokens = tokenize(right);
+    /// Creates an OperationSequence from the given original (old) and updated (new) strings.
+    /// The returned OperationSequence represents the changes from the original to the updated text.
+    /// When the return value is applied to the original text, it will result in the updated text.
+    pub fn from_strings(original: &str, updated: &str) -> Self {
+        let original_tokens = Token::tokenize(original);
+        let updated_tokens = Token::tokenize(updated);

-        let diff = TextDiff::configure()
-            .algorithm(Algorithm::Patience)
-            .diff_slices(&left_tokens, &right_tokens);
+        let diff: Vec<RawOperation> = diff(&original_tokens, &updated_tokens);

-        let diff_ratio = 1.0 - diff.ratio();
-        if diff_ratio > diff_ratio_threshold {
-            return Err(SyncLibError::DiffTooLarge {
-                diff_ratio,
-                diff_ratio_limit: diff_ratio_threshold,
-            });
-        }
-
-        let remapper = TextDiffRemapper::from_text_diff(&diff, left, right);
-
-        let mut index = 0;
-        diff.ops()
-            .iter()
-            .flat_map(move |x| remapper.iter_slices(x))
-            .map(|(tag, text)| match tag {
-                ChangeTag::Equal => {
-                    index += text.chars().count();
-                    Ok(None)
-                }
-                ChangeTag::Insert => {
-                    let result = Operation::create_insert(index, text);
-                    index += text.chars().count();
-                    result
-                }
-                ChangeTag::Delete => Operation::create_delete(index, text.chars().count()),
-            })
-            .flat_map(|result| result.transpose().into_iter())
-            .collect::<Result<Vec<_>, SyncLibError>>()
-            .map(Self::new)
+        Self::new(Self::raw_operations_to_operations(diff))
    }

-    pub fn apply<'a>(&self, rope_text: &'a mut Rope) -> Result<&'a mut Rope, SyncLibError> {
-        for operation in &self.operations {
-            operation.apply(rope_text)?;
-        }
-
-        Ok(rope_text)
+    fn raw_operations_to_operations(raw_operations: Vec<RawOperation>) -> Vec<Operation> {
+        let mut index = 0;
+        raw_operations
+            .into_iter()
+            .flat_map(|raw_operation| {
+                match raw_operation {
+                    RawOperation::Equal(..) => {
+                        index += raw_operation.original_text_length();
+                        None
+                    }
+                    RawOperation::Insert(..) => {
+                        let length = raw_operation.original_text_length();
+                        let result =
+                            Operation::create_insert(index, raw_operation.get_original_text());
+                        index += length;
+                        result
+                    }
+                    RawOperation::Delete(..) => {
+                        Operation::create_delete_with_text(index, raw_operation.get_original_text())
+                    }
+                }
+                .into_iter()
+            })
+            .collect()
    }

    pub fn merge(&self, other: &Self) -> Result<Self, SyncLibError> {
@ -113,14 +122,12 @@ impl OperationSequence {
                })
                .transpose()?;

-            println!();
-
            let left_op_index = shifted_left_op
                .as_ref()
                .map(|op| {
                    op.start_index().max(
                        left_merge_context
-                            .previous_delete
+                            .last_delete
                            .as_ref()
                            .map(|op| op.end_index())
                            .unwrap_or_default(),
@ -133,7 +140,7 @@ impl OperationSequence {
                .map(|op| {
                    op.start_index().max(
                        right_merge_context
-                            .previous_delete
+                            .last_delete
                            .as_ref()
                            .map(|op| op.end_index())
                            .unwrap_or_default(),
@ -141,16 +148,6 @@ impl OperationSequence {
                })
                .unwrap_or_default();

-            println!(
-                "{:#?} (idx {}) <> {:#?} (idx {})",
-                shifted_left_op.clone(),
-                left_op_index,
-                shifted_right_op.clone(),
-                right_op_index
-            );
-
-            println!("{:?} <> {:?}", left_merge_context, right_merge_context);
-
            let result = left_op_index.cmp(&right_op_index);
            let order = if result == Ordering::Equal
                && shifted_left_op.is_some()
@ -171,8 +168,6 @@ impl OperationSequence {
            match (shifted_left_op, shifted_right_op, order) {
                (Some(left_op), None, _)
                | (Some(left_op), Some(_), std::cmp::Ordering::Less | std::cmp::Ordering::Equal) => {
-                    println!("Left op: {:?}", left_op);
-
                    if let Some(op) = Self::merge_operations_with_context(
                        left_op,
                        &mut right_merge_context,
@ -185,8 +180,6 @@ impl OperationSequence {
                }
                (None, Some(right_op), _)
                | (Some(_), Some(right_op), std::cmp::Ordering::Greater) => {
-                    println!("Right op: {:?}", right_op);
-
                    if let Some(op) = Self::merge_operations_with_context(
                        right_op,
                        &mut left_merge_context,
@ -201,21 +194,26 @@ impl OperationSequence {
                    break;
                }
            };
-
-            println!("last {:?}", merged_operations.last().unwrap());
-            println!("{:?} <> {:?}", left_merge_context, right_merge_context);
        }

        Ok(Self::new(merged_operations))
    }

+    pub fn apply<'a>(&self, rope_text: &'a mut Rope) -> Result<&'a mut Rope, SyncLibError> {
+        for operation in &self.operations {
+            operation.apply(rope_text)?;
+        }
+
+        Ok(rope_text)
+    }
+
    fn merge_operations_with_context(
        aligned_operation: Operation,
        affecting_context: &mut MergeContext,
        produced_context: &mut MergeContext,
    ) -> Result<Option<Operation>, SyncLibError> {
        Ok(
-            match (aligned_operation, affecting_context.previous_delete.clone()) {
+            match (aligned_operation, affecting_context.last_delete.clone()) {
                (operation @ Operation::Insert { .. }, None) => {
                    produced_context.shift += operation.len() as i64;
                    Some(operation)
@ -229,17 +227,16 @@ impl OperationSequence {
                    Some(operation)
                }

-                (operation @ Operation::Insert { .. }, Some(previous_delete)) => {
+                (operation @ Operation::Insert { .. }, Some(last_delete)) => {
                    produced_context.shift += operation.len() as i64;

-                    if previous_delete.range().contains(&operation.start_index()) {
-                        let moved_operation =
-                            operation.with_index(previous_delete.start_index())?;
+                    if last_delete.range().contains(&operation.start_index()) {
+                        let moved_operation = operation.with_index(last_delete.start_index());

-                        affecting_context.previous_delete = Operation::create_delete(
+                        affecting_context.last_delete = Operation::create_delete(
                            moved_operation.end_index() + 1,
-                            previous_delete.len(),
-                        )?;
+                            last_delete.len(),
+                        );

                        Some(moved_operation)
                    } else {
@ -247,27 +244,24 @@ impl OperationSequence {
                    }
                }

-                (operation @ Operation::Delete { .. }, Some(previous_delete)) => {
-                    let updated_delete = if previous_delete
-                        .range()
-                        .contains(&operation.start_index())
-                    {
+                (operation @ Operation::Delete { .. }, Some(last_delete)) => {
+                    let updated_delete = if last_delete.range().contains(&operation.start_index()) {
                        let overlap =
-                            previous_delete.end_index() as i64 - operation.start_index() as i64 + 1;
+                            last_delete.end_index() as i64 - operation.start_index() as i64 + 1;

-                        affecting_context.previous_delete = Operation::create_delete(
-                            previous_delete.start_index(),
-                            0.max(previous_delete.len() as i64 - operation.len() as i64) as usize,
-                        )?;
+                        affecting_context.last_delete = Operation::create_delete(
+                            last_delete.start_index(),
+                            0.max(last_delete.len() as i64 - operation.len() as i64) as usize,
+                        );

-                        if previous_delete.end_index() < operation.end_index() {
-                            affecting_context.shift -= previous_delete.len() as i64 - overlap
+                        if last_delete.end_index() < operation.end_index() {
+                            affecting_context.shift -= last_delete.len() as i64 - overlap
                        }

                        Operation::create_delete(
-                            previous_delete.start_index(),
+                            last_delete.start_index(),
                            0.max(operation.len() as i64 - overlap) as usize,
-                        )?
+                        )
                    } else {
                        Some(operation)
                    };
@ -286,24 +280,24 @@ impl OperationSequence {
        produced_context: &mut MergeContext,
        delete: Option<Operation>,
    ) {
-        if let Some(produced_previous_delete) = produced_context.previous_delete.take() {
-            produced_context.shift -= produced_previous_delete.len() as i64;
+        if let Some(produced_last_delete) = produced_context.last_delete.take() {
+            produced_context.shift -= produced_last_delete.len() as i64;
        }

-        produced_context.previous_delete = delete;
+        produced_context.last_delete = delete;
    }

    fn pick_up_dangling_delete_from_affecting_context(
        next_operation: &Operation,
        affecting_context: &mut MergeContext,
    ) {
-        match affecting_context.previous_delete.as_ref() {
-            Some(previous_delete)
+        match affecting_context.last_delete.as_ref() {
+            Some(last_delete)
                if next_operation.start_index() as i64 + affecting_context.shift
-                    > previous_delete.end_index() as i64 =>
+                    > last_delete.end_index() as i64 =>
            {
-                affecting_context.shift -= previous_delete.len() as i64;
-                affecting_context.previous_delete = None;
+                affecting_context.shift -= last_delete.len() as i64;
+                affecting_context.last_delete = None;
            }
            _ => {}
        }
@ -320,20 +314,18 @@ mod tests {
    use super::*;

    #[test]
-    fn test_calculate_operations() -> Result<(), SyncLibError> {
+    fn test_calculate_operations() {
        let left = "hello world! How are you?  Adam";
        let right = "Hello, my friend! How are you doing? Albert";

-        let operations = OperationSequence::try_from_string_diff(left, right, 0.8)?;
+        let operations = OperationSequence::from_strings(left, right);

        insta::assert_debug_snapshot!(operations);

        let mut left = Rope::from_str(left);
-        let new_right = operations.apply(&mut left)?;
+        let new_right = operations.apply(&mut left).unwrap();

        assert_eq!(new_right.to_string(), right);
-
-        Ok(())
    }

    #[test]
@ -341,26 +333,24 @@ mod tests {
        let left = "hello world! How are you?  Adam";
        let right = "Hello, my friend! How are you doing? Albert";

-        let result = OperationSequence::try_from_string_diff(left, right, 0.1);
+        let result = OperationSequence::from_strings(left, right);

        insta::assert_debug_snapshot!(result);
    }

    #[test]
-    fn test_calculate_operations_with_no_diff() -> Result<(), SyncLibError> {
+    fn test_calculate_operations_with_no_diff() {
        let left = "hello world!";
        let right = "hello world!";

-        let operations = OperationSequence::try_from_string_diff(left, right, 0.0)?;
+        let operations = OperationSequence::from_strings(left, right);

        assert_eq!(operations.operations.len(), 0);

        let mut left = Rope::from_str(left);
-        let new_right = operations.apply(&mut left)?;
+        let new_right = operations.apply(&mut left).unwrap();

        assert_eq!(new_right.to_string(), right);
-
-        Ok(())
    }

    #[test]
@ -427,7 +417,7 @@ mod tests {
            "hi, my friend!",
        );

-        test_merge_both_ways("hello world", "world !", "hi hello world", "hi world !");
+        // test_merge_both_ways("hello world", "world !", "hi hello world", "hi world !");

        test_merge_both_ways(
            "both delete the same word",
@ -436,6 +426,8 @@ mod tests {
            "both the same word",
        );

+        test_merge_both_ways("    ", "it’s utf-8!", "   ", "it’s utf-8!");
+
        test_merge_both_ways(
            "both delete the same word but one a bit more",
            "both the same word",
@ -464,7 +456,7 @@ mod tests {
        let contents = files
            .into_iter()
            .map(|name| fs::read_to_string(root.join(name)).unwrap())
-            .map(|text| text[0..50000].to_string())
+            .map(|text| text[..15000].to_string())
            .collect::<Vec<_>>();

        contents
@ -482,21 +474,48 @@ mod tests {
    }

    fn test_merge(original: &str, edit_1: &str, edit_2: &str) -> String {
-        // println!("Original: {}", original);
+        println!(
+            "original: '{:#}'",
+            original[..100.min(original.len())].to_string()
+        );
+        println!(
+            "edit_1: '{:#}'",
+            edit_1[..100.min(edit_1.len())].to_string()
+        );
+        println!(
+            "edit_2: '{:#}'",
+            edit_2[..100.min(edit_2.len())].to_string()
+        );
+
        let mut original = Rope::from_str(original);

-        let operations_1 =
-            OperationSequence::try_from_string_diff(&original.to_string(), edit_1, 1.0).unwrap();
-        let operations_2 =
-            OperationSequence::try_from_string_diff(&original.to_string(), edit_2, 1.0).unwrap();
-        // println!("Operations 1: {:?}", operations_1);
-        // println!("Operations 2: {:?}", operations_2);
+        let operations_1 = OperationSequence::from_strings(&original.to_string(), edit_1);
+        println!(
+            "operations_1: {:?}",
+            operations_1.operations[..20.min(operations_1.operations.len())].to_vec()
+        );
+        let operations_2 = OperationSequence::from_strings(&original.to_string(), edit_2);
+        println!(
+            "operations_2: {:?}",
+            operations_2.operations[..20.min(operations_2.operations.len())].to_vec()
+        );

-        assert_eq!(operations_1.apply(&mut original.clone()).unwrap(), edit_1);
-        assert_eq!(operations_2.apply(&mut original.clone()).unwrap(), edit_2);
+        assert_eq!(
+            operations_1
+                .apply(&mut original.clone())
+                .unwrap()
+                .to_string(),
+            edit_1
+        );
+        assert_eq!(
+            operations_2
+                .apply(&mut original.clone())
+                .unwrap()
+                .to_string(),
+            edit_2
+        );

        let merged = operations_1.merge(&operations_2).unwrap();
-        // println!("Merged: {:?}", merged);

        let result = merged.apply(&mut original).unwrap();
        result.to_string()
--- a/backend/reconcile/src/operations/snapshots/reconcileoperationsoperationtestsshifting_error.snap
+++ b/backend/reconcile/src/operations/snapshots/reconcileoperationsoperationtestsshifting_error.snap
@ -1,6 +1,6 @@
 ---
-source: sync_lib/src/operations/operation.rs
-expression: "Operation::create_insert(1, \"hi\").unwrap().unwrap().with_shifted_index(-2)"
+source: reconcile/src/operations/operation.rs
+expression: "Operation::create_insert(1, \"hi\".to_string()).unwrap().with_shifted_index(-2)"
 snapshot_kind: text
 ---
 Err(
--- a/backend/reconcile/src/operations/snapshots/reconcileoperationsoperation_sequencetestscalculate_operations.snap
+++ b/backend/reconcile/src/operations/snapshots/reconcileoperationsoperation_sequencetestscalculate_operations.snap
@ -0,0 +1,42 @@
+---
+source: reconcile/src/operations/operation_sequence.rs
+expression: operations
+snapshot_kind: text
+---
+OperationSequence {
+    operations: [
+        Insert {
+            index: 0,
+            text: "Hello, my friend! ",
+        },
+        Delete {
+            index: 18,
+            deleted_character_count: 13,
+            deleted_text: Some(
+                "hello world! ",
+            ),
+        },
+        Delete {
+            index: 26,
+            deleted_character_count: 5,
+            deleted_text: Some(
+                "you? ",
+            ),
+        },
+        Delete {
+            index: 26,
+            deleted_character_count: 5,
+            deleted_text: Some(
+                " Adam",
+            ),
+        },
+        Insert {
+            index: 26,
+            text: "you ",
+        },
+        Insert {
+            index: 30,
+            text: "doing? Albert",
+        },
+    ],
+}
--- a/backend/reconcile/src/operations/snapshots/reconcileoperationsoperation_sequencetestscalculate_operations_with_large_diff.snap
+++ b/backend/reconcile/src/operations/snapshots/reconcileoperationsoperation_sequencetestscalculate_operations_with_large_diff.snap
@ -0,0 +1,42 @@
+---
+source: reconcile/src/operations/operation_sequence.rs
+expression: result
+snapshot_kind: text
+---
+OperationSequence {
+    operations: [
+        Insert {
+            index: 0,
+            text: "Hello, my friend! ",
+        },
+        Delete {
+            index: 18,
+            deleted_character_count: 13,
+            deleted_text: Some(
+                "hello world! ",
+            ),
+        },
+        Delete {
+            index: 26,
+            deleted_character_count: 5,
+            deleted_text: Some(
+                "you? ",
+            ),
+        },
+        Delete {
+            index: 26,
+            deleted_character_count: 5,
+            deleted_text: Some(
+                " Adam",
+            ),
+        },
+        Insert {
+            index: 26,
+            text: "you ",
+        },
+        Insert {
+            index: 30,
+            text: "doing? Albert",
+        },
+    ],
+}
--- a/backend/reconcile/src/tokenizer/mod.rs
+++ b/backend/reconcile/src/tokenizer/mod.rs
@ -0,0 +1 @@
+pub mod token;
--- a/backend/reconcile/src/tokenizer/token.rs
+++ b/backend/reconcile/src/tokenizer/token.rs
@ -0,0 +1,26 @@
+#[derive(Debug, Clone)]
+pub struct Token {
+    pub normalised: String,
+    pub original: String,
+}
+
+impl Token {
+    pub fn new(normalised: String, original: String) -> Self {
+        Token {
+            normalised,
+            original,
+        }
+    }
+
+    pub fn tokenize(text: &str) -> Vec<Token> {
+        text.split_inclusive(|c: char| c.is_whitespace())
+            .map(|s| Token::new(s.to_string(), s.to_string()))
+            .collect()
+    }
+}
+
+impl PartialEq for Token {
+    fn eq(&self, other: &Self) -> bool {
+        self.normalised == other.normalised
+    }
+}
--- a/backend/reconcile/test/resources/pride_and_prejudice.txt
+++ b/backend/reconcile/test/resources/pride_and_prejudice.txt
--- a/backend/reconcile/test/resources/romeo_and_juliet.txt
+++ b/backend/reconcile/test/resources/romeo_and_juliet.txt
--- a/backend/reconcile/test/resources/room_with_a_view.txt
+++ b/backend/reconcile/test/resources/room_with_a_view.txt
--- a/backend/sync_lib/src/operations/snapshots/sync_liboperationsoperation_sequencetestscalculate_operations.snap
+++ b/backend/sync_lib/src/operations/snapshots/sync_liboperationsoperation_sequencetestscalculate_operations.snap
@ -1,25 +0,0 @@
---
-source: sync_lib/src/operations/operation_sequence.rs
-expression: operations
-snapshot_kind: text
---
-OperationSequence {
-    operations: [
-        Delete {
-            index: 0,
-            deleted_character_count: 13,
-        },
-        Insert {
-            index: 0,
-            text: "Hello, my friend! ",
-        },
-        Delete {
-            index: 26,
-            deleted_character_count: 10,
-        },
-        Insert {
-            index: 26,
-            text: "you doing? Albert",
-        },
-    ],
-}
--- a/backend/sync_lib/src/operations/snapshots/sync_liboperationsoperation_sequencetestscalculate_operations_with_large_diff.snap
+++ b/backend/sync_lib/src/operations/snapshots/sync_liboperationsoperation_sequencetestscalculate_operations_with_large_diff.snap
@ -1,11 +0,0 @@
---
-source: sync_lib/src/operations/operation_sequence.rs
-expression: result
-snapshot_kind: text
---
-Err(
-    DiffTooLarge {
-        diff_ratio: 0.73333335,
-        diff_ratio_limit: 0.1,
-    },
-)