Merge crates
This commit is contained in:
parent
82e77eec89
commit
bcbac03228
60 changed files with 73 additions and 248 deletions
2
src/diffs.rs
Normal file
2
src/diffs.rs
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
pub mod myers;
|
||||
pub mod raw_operation;
|
||||
357
src/diffs/myers.rs
Normal file
357
src/diffs/myers.rs
Normal file
|
|
@ -0,0 +1,357 @@
|
|||
//! Taken from <https://github.com/mitsuhiko/similar/blob/7e15c44de11a1cd61e1149189929e189ef977fd8/src/algorithms/myers.rs>
|
||||
//!
|
||||
//! Myers' diff algorithm.
|
||||
//!
|
||||
//! * time: `O((N+M)D)`
|
||||
//! * space `O(N+M)`
|
||||
//!
|
||||
//! See [the original article by Eugene W. Myers](http://www.xmailserver.org/diff2.pdf)
|
||||
//! describing it.
|
||||
//!
|
||||
//! The implementation of this algorithm is based on the implementation by
|
||||
//! Brandon Williams.
|
||||
//!
|
||||
//! # Heuristics
|
||||
//!
|
||||
//! At present this implementation of Myers' does not implement any more
|
||||
//! advanced heuristics that would solve some pathological cases. For instance
|
||||
//! passing two large and completely distinct sequences to the algorithm will
|
||||
//! make it spin without making reasonable progress.
|
||||
//! For potential improvements here see [similar#15](https://github.com/mitsuhiko/similar/issues/15).
|
||||
|
||||
use std::{
|
||||
ops::{Index, IndexMut, Range},
|
||||
vec,
|
||||
};
|
||||
|
||||
use super::raw_operation::RawOperation;
|
||||
use crate::{
|
||||
tokenizer::token::Token,
|
||||
utils::{common_prefix_len::common_prefix_len, common_suffix_len::common_suffix_len},
|
||||
};
|
||||
|
||||
/// Myers' diff algorithm.
|
||||
///
|
||||
/// Diff `old`, between indices `old_range` and `new` between indices
|
||||
/// `new_range`.
|
||||
///
|
||||
/// The returned `RawOperations` all have a token count of 1.
|
||||
pub fn diff<T>(old: &[Token<T>], new: &[Token<T>]) -> Vec<RawOperation<T>>
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
let max_d = (old.len() + new.len()).div_ceil(2) + 1;
|
||||
let mut vb = V::new(max_d);
|
||||
let mut vf = V::new(max_d);
|
||||
let mut result: Vec<RawOperation<T>> = vec![];
|
||||
|
||||
conquer(
|
||||
old,
|
||||
0..old.len(),
|
||||
new,
|
||||
0..new.len(),
|
||||
&mut vf,
|
||||
&mut vb,
|
||||
&mut result,
|
||||
);
|
||||
|
||||
debug_assert!(
|
||||
result.iter().all(|op| op.tokens().len() == 1),
|
||||
"All operations should be of length 1"
|
||||
);
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
// A D-path is a path which starts at (0,0) that has exactly D non-diagonal
|
||||
// edges. All D-paths consist of a (D - 1)-path followed by a non-diagonal edge
|
||||
// and then a possibly empty sequence of diagonal edges called a snake.
|
||||
|
||||
/// `V` contains the endpoints of the furthest reaching `D-paths`. For each
|
||||
/// recorded endpoint `(x,y)` in diagonal `k`, we only need to retain `x`
|
||||
/// because `y` can be computed from `x - k`. In other words, `V` is an array of
|
||||
/// integers where `V[k]` contains the row index of the endpoint of the furthest
|
||||
/// reaching path in diagonal `k`.
|
||||
///
|
||||
/// We can't use a traditional Vec to represent `V` since we use `k` as an index
|
||||
/// and it can take on negative values. So instead `V` is represented as a
|
||||
/// light-weight wrapper around a Vec plus an `offset` which is the maximum
|
||||
/// value `k` can take on in order to map negative `k`'s back to a value >= 0.
|
||||
#[derive(Debug)]
|
||||
struct V {
|
||||
offset: isize,
|
||||
v: Vec<usize>, // Look into initializing this to -1 and storing isize
|
||||
}
|
||||
|
||||
impl V {
|
||||
fn new(max_d: usize) -> Self {
|
||||
Self {
|
||||
offset: max_d as isize,
|
||||
v: vec![0; 2 * max_d],
|
||||
}
|
||||
}
|
||||
|
||||
fn len(&self) -> usize { self.v.len() }
|
||||
}
|
||||
|
||||
impl Index<isize> for V {
|
||||
type Output = usize;
|
||||
|
||||
fn index(&self, index: isize) -> &Self::Output { &self.v[(index + self.offset) as usize] }
|
||||
}
|
||||
|
||||
impl IndexMut<isize> for V {
|
||||
fn index_mut(&mut self, index: isize) -> &mut Self::Output {
|
||||
&mut self.v[(index + self.offset) as usize]
|
||||
}
|
||||
}
|
||||
|
||||
fn split_at(range: Range<usize>, at: usize) -> (Range<usize>, Range<usize>) {
|
||||
(range.start..at, at..range.end)
|
||||
}
|
||||
|
||||
/// A `Snake` is a sequence of diagonal edges in the edit graph. Normally
|
||||
/// a snake has a start end end point (and it is possible for a snake to have
|
||||
/// a length of zero, meaning the start and end points are the same) however
|
||||
/// we do not need the end point which is why it's not implemented here.
|
||||
///
|
||||
/// The divide part of a divide-and-conquer strategy. A D-path has D+1 snakes
|
||||
/// some of which may be empty. The divide step requires finding the ceil(D/2) +
|
||||
/// 1 or middle snake of an optimal D-path. The idea for doing so is to
|
||||
/// simultaneously run the basic algorithm in both the forward and reverse
|
||||
/// directions until furthest reaching forward and reverse paths starting at
|
||||
/// opposing corners 'overlap'.
|
||||
fn find_middle_snake<T>(
|
||||
old: &[Token<T>],
|
||||
old_range: Range<usize>,
|
||||
new: &[Token<T>],
|
||||
new_range: Range<usize>,
|
||||
vf: &mut V,
|
||||
vb: &mut V,
|
||||
) -> Option<(usize, usize)>
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
let n = old_range.len();
|
||||
let m = new_range.len();
|
||||
|
||||
// By Lemma 1 in the paper, the optimal edit script length is odd or even as
|
||||
// `delta` is odd or even.
|
||||
let delta = n as isize - m as isize;
|
||||
let odd = delta & 1 == 1;
|
||||
|
||||
// The initial point at (0, -1)
|
||||
vf[1] = 0;
|
||||
// The initial point at (N, M+1)
|
||||
vb[1] = 0;
|
||||
|
||||
let d_max = (n + m).div_ceil(2) + 1;
|
||||
assert!(vf.len() >= d_max);
|
||||
assert!(vb.len() >= d_max);
|
||||
|
||||
for d in 0..d_max as isize {
|
||||
// Forward path
|
||||
for k in (-d..=d).rev().step_by(2) {
|
||||
let mut x = if k == -d || (k != d && vf[k - 1] < vf[k + 1]) {
|
||||
vf[k + 1]
|
||||
} else {
|
||||
vf[k - 1] + 1
|
||||
};
|
||||
let y = (x as isize - k) as usize;
|
||||
|
||||
// The coordinate of the start of a snake
|
||||
let (x0, y0) = (x, y);
|
||||
// While these sequences are identical, keep moving through the
|
||||
// graph with no cost
|
||||
if x < old_range.len() && y < new_range.len() {
|
||||
let advance = common_prefix_len(
|
||||
old,
|
||||
old_range.start + x..old_range.end,
|
||||
new,
|
||||
new_range.start + y..new_range.end,
|
||||
);
|
||||
x += advance;
|
||||
}
|
||||
|
||||
// This is the new best x value
|
||||
vf[k] = x;
|
||||
|
||||
// Only check for connections from the forward search when N - M is
|
||||
// odd and when there is a reciprocal k line coming from the other
|
||||
// direction.
|
||||
if odd && (k - delta).abs() <= (d - 1) {
|
||||
// TODO optimize this so we don't have to compare against n
|
||||
if vf[k] + vb[-(k - delta)] >= n {
|
||||
// Return the snake
|
||||
return Some((x0 + old_range.start, y0 + new_range.start));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Backward path
|
||||
for k in (-d..=d).rev().step_by(2) {
|
||||
let mut x = if k == -d || (k != d && vb[k - 1] < vb[k + 1]) {
|
||||
vb[k + 1]
|
||||
} else {
|
||||
vb[k - 1] + 1
|
||||
};
|
||||
let mut y = (x as isize - k) as usize;
|
||||
|
||||
// The coordinate of the start of a snake
|
||||
if x < n && y < m {
|
||||
let advance = common_suffix_len(
|
||||
old,
|
||||
old_range.start..old_range.start + n - x,
|
||||
new,
|
||||
new_range.start..new_range.start + m - y,
|
||||
);
|
||||
x += advance;
|
||||
y += advance;
|
||||
}
|
||||
|
||||
// This is the new best x value
|
||||
vb[k] = x;
|
||||
|
||||
if !odd && (k - delta).abs() <= d {
|
||||
// TODO optimize this so we don't have to compare against n
|
||||
if vb[k] + vf[-(k - delta)] >= n {
|
||||
// Return the snake
|
||||
return Some((n - x + old_range.start, m - y + new_range.start));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Maybe there's an opportunity to optimize and bail early?
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
fn conquer<T>(
|
||||
old: &[Token<T>],
|
||||
mut old_range: Range<usize>,
|
||||
new: &[Token<T>],
|
||||
mut new_range: Range<usize>,
|
||||
vf: &mut V,
|
||||
vb: &mut V,
|
||||
result: &mut Vec<RawOperation<T>>,
|
||||
) where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
// Check for common prefix
|
||||
let common_prefix_len = common_prefix_len(old, old_range.clone(), new, new_range.clone());
|
||||
if common_prefix_len > 0 {
|
||||
result.extend(
|
||||
old[old_range.start..old_range.start + common_prefix_len]
|
||||
.iter()
|
||||
.map(|token| RawOperation::Equal(vec![token.clone()])),
|
||||
);
|
||||
}
|
||||
old_range.start += common_prefix_len;
|
||||
new_range.start += common_prefix_len;
|
||||
|
||||
// Check for common suffix
|
||||
let common_suffix_len = common_suffix_len(old, old_range.clone(), new, new_range.clone());
|
||||
let common_suffix = (
|
||||
old_range.end - common_suffix_len,
|
||||
new_range.end - common_suffix_len,
|
||||
);
|
||||
old_range.end -= common_suffix_len;
|
||||
new_range.end -= common_suffix_len;
|
||||
|
||||
if old_range.is_empty() && new_range.is_empty() {
|
||||
// do nothing
|
||||
} else if new_range.is_empty() {
|
||||
result.extend(
|
||||
old[old_range.start..old_range.start + old_range.len()]
|
||||
.iter()
|
||||
.map(|token| RawOperation::Delete(vec![token.clone()])),
|
||||
);
|
||||
} else if old_range.is_empty() {
|
||||
result.extend(
|
||||
new[new_range.start..new_range.start + new_range.len()]
|
||||
.iter()
|
||||
.map(|token| RawOperation::Insert(vec![token.clone()])),
|
||||
);
|
||||
} else if let Some((x_start, y_start)) =
|
||||
find_middle_snake(old, old_range.clone(), new, new_range.clone(), vf, vb)
|
||||
{
|
||||
let (old_a, old_b) = split_at(old_range, x_start);
|
||||
let (new_a, new_b) = split_at(new_range, y_start);
|
||||
conquer(old, old_a, new, new_a, vf, vb, result);
|
||||
conquer(old, old_b, new, new_b, vf, vb, result);
|
||||
} else {
|
||||
result.extend(
|
||||
old[old_range.start..old_range.end]
|
||||
.iter()
|
||||
.map(|token| RawOperation::Delete(vec![token.clone()])),
|
||||
);
|
||||
result.extend(
|
||||
new[new_range.start..new_range.end]
|
||||
.iter()
|
||||
.map(|token| RawOperation::Insert(vec![token.clone()])),
|
||||
);
|
||||
}
|
||||
|
||||
if common_suffix_len > 0 {
|
||||
result.extend(
|
||||
old[common_suffix.0..common_suffix.0 + common_suffix_len]
|
||||
.iter()
|
||||
.map(|token| RawOperation::Equal(vec![token.clone()])),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use insta::assert_debug_snapshot;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_empty_diff() {
|
||||
let old: Vec<Token<String>> = vec![];
|
||||
let new: Vec<Token<String>> = vec![];
|
||||
let result = diff(&old, &new);
|
||||
assert_eq!(result.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_identical_content() {
|
||||
let content = vec!["a".into(), "b".into(), "c".into()];
|
||||
let result = diff(&content, &content);
|
||||
assert_debug_snapshot!(result);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_insert_only() {
|
||||
let old: Vec<Token<String>> = vec![];
|
||||
let new: Vec<Token<String>> = vec!["a".into(), "b".into()];
|
||||
let result = diff(&old, &new);
|
||||
assert_debug_snapshot!(result);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_delete_only() {
|
||||
let old = vec!["a".into(), "b".into()];
|
||||
let new: Vec<Token<String>> = vec![];
|
||||
let result = diff(&old, &new);
|
||||
assert_debug_snapshot!(result);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_prefix_and_suffix() {
|
||||
let old = vec!["a".into(), "b".into(), "c".into(), "d".into()];
|
||||
let new = vec!["a".into(), "x".into(), "d".into()];
|
||||
let result = diff(&old, &new);
|
||||
assert_debug_snapshot!(result);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_complex_diff() {
|
||||
let old = vec!["a".into(), "b".into(), "c".into(), "d".into()];
|
||||
let new = vec!["a".into(), "x".into(), "c".into(), "y".into()];
|
||||
let result = diff(&old, &new);
|
||||
assert_debug_snapshot!(result);
|
||||
}
|
||||
}
|
||||
64
src/diffs/raw_operation.rs
Normal file
64
src/diffs/raw_operation.rs
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
use crate::tokenizer::token::Token;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum RawOperation<T>
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
Insert(Vec<Token<T>>),
|
||||
Delete(Vec<Token<T>>),
|
||||
Equal(Vec<Token<T>>),
|
||||
}
|
||||
|
||||
impl<T> RawOperation<T>
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
pub fn tokens(&self) -> &Vec<Token<T>> {
|
||||
match self {
|
||||
RawOperation::Insert(tokens)
|
||||
| RawOperation::Delete(tokens)
|
||||
| RawOperation::Equal(tokens) => tokens,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn original_text_length(&self) -> usize {
|
||||
self.tokens().iter().map(Token::get_original_length).sum()
|
||||
}
|
||||
|
||||
pub fn get_original_text(self) -> String { self.tokens().iter().map(Token::original).collect() }
|
||||
|
||||
pub fn is_left_joinable(&self) -> bool {
|
||||
let first_token = self.tokens().first();
|
||||
first_token.is_none_or(|token| token.is_left_joinable)
|
||||
}
|
||||
|
||||
pub fn is_right_joinable(&self) -> bool {
|
||||
let last_token = self.tokens().last();
|
||||
last_token.is_none_or(|token| token.is_right_joinable)
|
||||
}
|
||||
|
||||
/// Extends the operation with another operation. Only operations of the
|
||||
/// same type as self can be used to extend self, otherwise the function
|
||||
/// will panic.
|
||||
pub fn extend(self, other: RawOperation<T>) -> RawOperation<T> {
|
||||
debug_assert!(
|
||||
std::mem::discriminant(&self) == std::mem::discriminant(&other),
|
||||
"Cannot extend operations of different types. This should have been handled before \
|
||||
calling this function."
|
||||
);
|
||||
|
||||
match (self, other) {
|
||||
(RawOperation::Insert(self_tokens), RawOperation::Insert(other_tokens)) => {
|
||||
RawOperation::Insert(self_tokens.into_iter().chain(other_tokens).collect())
|
||||
}
|
||||
(RawOperation::Delete(tokens1), RawOperation::Delete(tokens2)) => {
|
||||
RawOperation::Delete(tokens1.into_iter().chain(tokens2).collect())
|
||||
}
|
||||
(RawOperation::Equal(tokens1), RawOperation::Equal(tokens2)) => {
|
||||
RawOperation::Equal(tokens1.into_iter().chain(tokens2).collect())
|
||||
}
|
||||
_ => unreachable!("Only operations of the same type can be extended"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,67 @@
|
|||
---
|
||||
source: reconcile/src/diffs/myers.rs
|
||||
expression: result
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
Equal(
|
||||
[
|
||||
Token {
|
||||
normalised: "a",
|
||||
original: "a",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
Insert(
|
||||
[
|
||||
Token {
|
||||
normalised: "x",
|
||||
original: "x",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
Delete(
|
||||
[
|
||||
Token {
|
||||
normalised: "b",
|
||||
original: "b",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
Equal(
|
||||
[
|
||||
Token {
|
||||
normalised: "c",
|
||||
original: "c",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
Insert(
|
||||
[
|
||||
Token {
|
||||
normalised: "y",
|
||||
original: "y",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
Delete(
|
||||
[
|
||||
Token {
|
||||
normalised: "d",
|
||||
original: "d",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
]
|
||||
|
|
@ -0,0 +1,27 @@
|
|||
---
|
||||
source: reconcile/src/diffs/myers.rs
|
||||
expression: result
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
Delete(
|
||||
[
|
||||
Token {
|
||||
normalised: "a",
|
||||
original: "a",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
Delete(
|
||||
[
|
||||
Token {
|
||||
normalised: "b",
|
||||
original: "b",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
]
|
||||
|
|
@ -0,0 +1,37 @@
|
|||
---
|
||||
source: reconcile/src/diffs/myers.rs
|
||||
expression: result
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
Equal(
|
||||
[
|
||||
Token {
|
||||
normalised: "a",
|
||||
original: "a",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
Equal(
|
||||
[
|
||||
Token {
|
||||
normalised: "b",
|
||||
original: "b",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
Equal(
|
||||
[
|
||||
Token {
|
||||
normalised: "c",
|
||||
original: "c",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
]
|
||||
|
|
@ -0,0 +1,27 @@
|
|||
---
|
||||
source: reconcile/src/diffs/myers.rs
|
||||
expression: result
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
Insert(
|
||||
[
|
||||
Token {
|
||||
normalised: "a",
|
||||
original: "a",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
Insert(
|
||||
[
|
||||
Token {
|
||||
normalised: "b",
|
||||
original: "b",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
]
|
||||
|
|
@ -0,0 +1,57 @@
|
|||
---
|
||||
source: reconcile/src/diffs/myers.rs
|
||||
expression: result
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
Equal(
|
||||
[
|
||||
Token {
|
||||
normalised: "a",
|
||||
original: "a",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
Delete(
|
||||
[
|
||||
Token {
|
||||
normalised: "b",
|
||||
original: "b",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
Delete(
|
||||
[
|
||||
Token {
|
||||
normalised: "c",
|
||||
original: "c",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
Insert(
|
||||
[
|
||||
Token {
|
||||
normalised: "x",
|
||||
original: "x",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
Equal(
|
||||
[
|
||||
Token {
|
||||
normalised: "d",
|
||||
original: "d",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
]
|
||||
13
src/lib.rs
Normal file
13
src/lib.rs
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
mod diffs;
|
||||
mod operation_transformation;
|
||||
mod tokenizer;
|
||||
mod utils;
|
||||
|
||||
pub use operation_transformation::{
|
||||
CursorPosition, EditedText, TextWithCursors, reconcile, reconcile_with_cursors,
|
||||
reconcile_with_tokenizer,
|
||||
};
|
||||
pub use tokenizer::{Tokenizer, token::Token, word_tokenizer::word_tokenizer};
|
||||
|
||||
#[cfg(feature = "wasm")]
|
||||
pub mod wasm;
|
||||
167
src/operation_transformation.rs
Normal file
167
src/operation_transformation.rs
Normal file
|
|
@ -0,0 +1,167 @@
|
|||
mod cursor;
|
||||
mod edited_text;
|
||||
mod merge_context;
|
||||
mod operation;
|
||||
mod ordered_operation;
|
||||
mod utils;
|
||||
|
||||
pub use cursor::{CursorPosition, TextWithCursors};
|
||||
pub use edited_text::EditedText;
|
||||
pub use operation::Operation;
|
||||
|
||||
use crate::Tokenizer;
|
||||
|
||||
#[must_use]
|
||||
pub fn reconcile(original: &str, left: &str, right: &str) -> String {
|
||||
reconcile_with_cursors(original, left.into(), right.into())
|
||||
.text
|
||||
.to_string()
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn reconcile_with_cursors<'a>(
|
||||
original: &'a str,
|
||||
left: TextWithCursors<'a>,
|
||||
right: TextWithCursors<'a>,
|
||||
) -> TextWithCursors<'static> {
|
||||
let left_operations = EditedText::from_strings(original, left);
|
||||
let right_operations = EditedText::from_strings(original, right);
|
||||
|
||||
let merged_operations = left_operations.merge(right_operations);
|
||||
|
||||
TextWithCursors::new_owned(merged_operations.apply(), merged_operations.cursors)
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn reconcile_with_tokenizer<'a, F, T>(
|
||||
original: &str,
|
||||
left: TextWithCursors<'a>,
|
||||
right: TextWithCursors<'a>,
|
||||
tokenizer: &Tokenizer<T>,
|
||||
) -> TextWithCursors<'static>
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
let left_operations = EditedText::from_strings_with_tokenizer(original, left, tokenizer);
|
||||
let right_operations = EditedText::from_strings_with_tokenizer(original, right, tokenizer);
|
||||
|
||||
let merged_operations = left_operations.merge(right_operations);
|
||||
|
||||
TextWithCursors::new_owned(merged_operations.apply(), merged_operations.cursors)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::{fs, ops::Range, path::Path};
|
||||
|
||||
use pretty_assertions::assert_eq;
|
||||
use test_case::test_matrix;
|
||||
|
||||
use super::*;
|
||||
use crate::CursorPosition;
|
||||
|
||||
#[test]
|
||||
fn test_cursor_complex() {
|
||||
let original = "this is some complex text to test cursor positions";
|
||||
let left = TextWithCursors::new(
|
||||
"this is really complex text for testing cursor positions",
|
||||
vec![
|
||||
CursorPosition {
|
||||
id: 0,
|
||||
char_index: 8,
|
||||
}, // after "this is "
|
||||
CursorPosition {
|
||||
id: 1,
|
||||
char_index: 22,
|
||||
}, // after "this is really complex text"
|
||||
],
|
||||
);
|
||||
let right = TextWithCursors::new(
|
||||
"that was some complex sample to test cursor movements",
|
||||
vec![
|
||||
CursorPosition {
|
||||
id: 2,
|
||||
char_index: 5,
|
||||
}, // after "that "
|
||||
CursorPosition {
|
||||
id: 3,
|
||||
char_index: 29,
|
||||
}, // after "some complex sample "
|
||||
],
|
||||
);
|
||||
|
||||
let merged = reconcile_with_cursors(original, left, right);
|
||||
|
||||
assert_eq!(
|
||||
merged,
|
||||
TextWithCursors::new(
|
||||
"that was really complex sample for testing cursor movements",
|
||||
vec![
|
||||
CursorPosition {
|
||||
id: 2,
|
||||
char_index: 5
|
||||
}, // unchanged
|
||||
CursorPosition {
|
||||
id: 0,
|
||||
char_index: 9
|
||||
}, // before "really"
|
||||
CursorPosition {
|
||||
id: 1,
|
||||
char_index: 23
|
||||
}, // inside of "s|ample" because "text" got replaced by "sample"
|
||||
CursorPosition {
|
||||
id: 3,
|
||||
char_index: 43
|
||||
}, // before "cursor movements"
|
||||
]
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
#[ignore = "expensive to run, only run in CI"]
|
||||
#[test_matrix( [
|
||||
"pride_and_prejudice.txt",
|
||||
"room_with_a_view.txt",
|
||||
"kun_lu.txt",
|
||||
"blns.txt"
|
||||
], [
|
||||
"pride_and_prejudice.txt",
|
||||
"room_with_a_view.txt",
|
||||
"kun_lu.txt",
|
||||
"blns.txt"
|
||||
], [
|
||||
"pride_and_prejudice.txt",
|
||||
"room_with_a_view.txt",
|
||||
"kun_lu.txt",
|
||||
"blns.txt"
|
||||
], [0..10000, 10000..20000], [0..10000, 10000..20000], [0..10000, 10000..20000])]
|
||||
fn test_merge_files_without_panic(
|
||||
file_name_1: &str,
|
||||
file_name_2: &str,
|
||||
file_name_3: &str,
|
||||
range_1: Range<usize>,
|
||||
range_2: Range<usize>,
|
||||
range_3: Range<usize>,
|
||||
) {
|
||||
let files = [file_name_1, file_name_2, file_name_3];
|
||||
let permutations = [range_1, range_2, range_3];
|
||||
|
||||
let root = Path::new("tests/resources/");
|
||||
|
||||
let contents = files
|
||||
.iter()
|
||||
.zip(permutations.iter())
|
||||
.map(|(file, range)| {
|
||||
let path = root.join(file);
|
||||
fs::read_to_string(&path)
|
||||
.unwrap()
|
||||
.chars()
|
||||
.skip(range.start)
|
||||
.take(range.end)
|
||||
.collect::<String>()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let _ = reconcile(&contents[0], &contents[1], &contents[2]);
|
||||
}
|
||||
}
|
||||
57
src/operation_transformation/cursor.rs
Normal file
57
src/operation_transformation/cursor.rs
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
use std::borrow::Cow;
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
// CursorPosition represents the position of an identifiable cursor in a text
|
||||
// document based on its (UTF-8) character index.
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone, PartialEq, Default)]
|
||||
pub struct CursorPosition {
|
||||
pub id: usize,
|
||||
pub char_index: usize,
|
||||
}
|
||||
|
||||
impl CursorPosition {
|
||||
#[must_use]
|
||||
pub fn with_index(&self, index: usize) -> Self {
|
||||
CursorPosition {
|
||||
id: self.id,
|
||||
char_index: index,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone, PartialEq, Default)]
|
||||
pub struct TextWithCursors<'a> {
|
||||
pub text: Cow<'a, str>,
|
||||
pub cursors: Vec<CursorPosition>,
|
||||
}
|
||||
|
||||
impl<'a> TextWithCursors<'a> {
|
||||
#[must_use]
|
||||
pub fn new(text: &'a str, cursors: Vec<CursorPosition>) -> Self {
|
||||
Self {
|
||||
text: text.into(),
|
||||
cursors,
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn new_owned(text: String, cursors: Vec<CursorPosition>) -> Self {
|
||||
Self {
|
||||
text: text.into(),
|
||||
cursors,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<&'a str> for TextWithCursors<'a> {
|
||||
fn from(text: &'a str) -> Self {
|
||||
Self {
|
||||
text: text.into(),
|
||||
cursors: Vec::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
277
src/operation_transformation/edited_text.rs
Normal file
277
src/operation_transformation/edited_text.rs
Normal file
|
|
@ -0,0 +1,277 @@
|
|||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::{CursorPosition, Operation, TextWithCursors, ordered_operation::OrderedOperation};
|
||||
use crate::{
|
||||
diffs::{myers::diff, raw_operation::RawOperation},
|
||||
operation_transformation::{
|
||||
merge_context::MergeContext,
|
||||
utils::{cook_operations::cook_operations, elongate_operations::elongate_operations},
|
||||
},
|
||||
tokenizer::{Tokenizer, word_tokenizer::word_tokenizer},
|
||||
utils::{side::Side, string_builder::StringBuilder},
|
||||
};
|
||||
|
||||
/// A text document and a sequence of operations that can be applied to the text
|
||||
/// document. `EditedText` supports merging two sequences of operations using
|
||||
/// the principles of Operational Transformation.
|
||||
///
|
||||
/// It's mainly created through the `from_strings` method, then merged with
|
||||
/// another `EditedText` derived from the same original text and then applied to
|
||||
/// the original text to get the reconciled text of concurrent edits.
|
||||
///
|
||||
/// In addition to text and operations, it also keeps track of cursor positions
|
||||
/// in the original text. The cursor positions are updated when the operations
|
||||
/// are applied, so that the cursor positions can be used to restore the
|
||||
/// cursor positions in the updated text.
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone, PartialEq, Default)]
|
||||
pub struct EditedText<'a, T>
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
text: &'a str,
|
||||
operations: Vec<OrderedOperation<T>>,
|
||||
pub(crate) cursors: Vec<CursorPosition>,
|
||||
}
|
||||
|
||||
impl<'a> EditedText<'a, String> {
|
||||
/// Create an `EditedText` from the given original (old) and updated (new)
|
||||
/// strings. The returned `EditedText` represents the changes from the
|
||||
/// original to the updated text. When the return value is applied to
|
||||
/// the original text, it will result in the updated text. The default
|
||||
/// word tokenizer is used to tokenize the text which splits the text on
|
||||
/// whitespaces.
|
||||
#[must_use]
|
||||
pub fn from_strings(original: &'a str, updated: TextWithCursors<'a>) -> Self {
|
||||
Self::from_strings_with_tokenizer(original, updated, &word_tokenizer)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T> EditedText<'a, T>
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
/// Create an `EditedText` from the given original (old) and updated (new)
|
||||
/// strings. The returned `EditedText` represents the changes from the
|
||||
/// original to the updated text. When the return value is applied to
|
||||
/// the original text, it will result in the updated text. The tokenizer
|
||||
/// function is used to tokenize the text.
|
||||
pub fn from_strings_with_tokenizer(
|
||||
original: &'a str,
|
||||
updated: TextWithCursors<'a>,
|
||||
tokenizer: &Tokenizer<T>,
|
||||
) -> Self {
|
||||
let original_tokens = (tokenizer)(original);
|
||||
let updated_tokens = (tokenizer)(&updated.text);
|
||||
|
||||
let diff: Vec<RawOperation<T>> = diff(&original_tokens, &updated_tokens);
|
||||
|
||||
Self::new(
|
||||
original,
|
||||
cook_operations(elongate_operations(diff)).collect(),
|
||||
updated.cursors,
|
||||
)
|
||||
}
|
||||
|
||||
/// Create a new `EditedText` with the given operations.
|
||||
/// The operations must be in the order in which they are meant to be
|
||||
/// applied. The operations must not overlap.
|
||||
fn new(
|
||||
text: &'a str,
|
||||
operations: Vec<OrderedOperation<T>>,
|
||||
mut cursors: Vec<CursorPosition>,
|
||||
) -> Self {
|
||||
operations
|
||||
.iter()
|
||||
.zip(operations.iter().skip(1))
|
||||
.for_each(|(previous, next)| {
|
||||
debug_assert!(
|
||||
previous.operation.start_index() <= next.operation.start_index(),
|
||||
"{} must not come before {} yet it does",
|
||||
previous.operation,
|
||||
next.operation
|
||||
);
|
||||
});
|
||||
|
||||
cursors.sort_by_key(|cursor| cursor.char_index);
|
||||
|
||||
Self {
|
||||
text,
|
||||
operations,
|
||||
cursors,
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn merge(self, other: Self) -> Self {
|
||||
debug_assert_eq!(
|
||||
self.text, other.text,
|
||||
"`EditedText`-s must be derived from the same text to be mergable"
|
||||
);
|
||||
|
||||
let mut left_merge_context = MergeContext::default();
|
||||
let mut right_merge_context = MergeContext::default();
|
||||
|
||||
let mut merged_cursors = Vec::with_capacity(self.cursors.len() + other.cursors.len());
|
||||
let mut left_cursors = self.cursors.into_iter().peekable();
|
||||
let mut right_cursors = other.cursors.into_iter().peekable();
|
||||
|
||||
let mut merged_operations: Vec<OrderedOperation<T>> =
|
||||
Vec::with_capacity(self.operations.len() + other.operations.len());
|
||||
|
||||
let mut left_iter = self.operations.into_iter();
|
||||
let mut right_iter = other.operations.into_iter();
|
||||
|
||||
let mut maybe_left_op = left_iter.next();
|
||||
let mut maybe_right_op = right_iter.next();
|
||||
|
||||
loop {
|
||||
let (side, OrderedOperation { operation, order }) =
|
||||
match (maybe_left_op.clone(), maybe_right_op.clone()) {
|
||||
(Some(left_op), Some(right_op)) => {
|
||||
if left_op < right_op {
|
||||
(Side::Left, left_op)
|
||||
} else {
|
||||
(Side::Right, right_op)
|
||||
}
|
||||
}
|
||||
|
||||
(Some(left_op), None) => (Side::Left, left_op),
|
||||
(None, Some(right_op)) => (Side::Right, right_op),
|
||||
(None, None) => break,
|
||||
};
|
||||
|
||||
if side == Side::Left {
|
||||
maybe_left_op = left_iter.next();
|
||||
} else {
|
||||
maybe_right_op = right_iter.next();
|
||||
}
|
||||
|
||||
let original_start = operation.start_index() as i64;
|
||||
let original_end = operation.end_index();
|
||||
let original_length = operation.len() as i64;
|
||||
|
||||
let result = match side {
|
||||
Side::Left => operation.merge_operations_with_context(
|
||||
&mut right_merge_context,
|
||||
&mut left_merge_context,
|
||||
),
|
||||
Side::Right => operation.merge_operations_with_context(
|
||||
&mut left_merge_context,
|
||||
&mut right_merge_context,
|
||||
),
|
||||
};
|
||||
|
||||
if let Some(ref op @ (Operation::Insert { .. } | Operation::Equal { .. })) = result {
|
||||
let shift =
|
||||
op.start_index() as i64 - original_start + op.len() as i64 - original_length;
|
||||
match side {
|
||||
Side::Left => {
|
||||
while let Some(cursor) =
|
||||
left_cursors.next_if(|cursor| cursor.char_index <= original_end + 1)
|
||||
{
|
||||
merged_cursors.push(cursor.with_index(
|
||||
(op.start_index() as i64).max(cursor.char_index as i64 + shift)
|
||||
as usize,
|
||||
));
|
||||
}
|
||||
}
|
||||
Side::Right => {
|
||||
while let Some(cursor) =
|
||||
right_cursors.next_if(|cursor| cursor.char_index <= original_end + 1)
|
||||
{
|
||||
merged_cursors.push(cursor.with_index(
|
||||
(op.start_index() as i64).max(cursor.char_index as i64 + shift)
|
||||
as usize,
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
merged_operations.extend(result.into_iter().map(|op| OrderedOperation {
|
||||
order,
|
||||
operation: op,
|
||||
}));
|
||||
}
|
||||
|
||||
let last_index = merged_operations
|
||||
.iter()
|
||||
.filter(|operation| {
|
||||
matches!(
|
||||
operation.operation,
|
||||
Operation::Insert { .. } | Operation::Equal { .. }
|
||||
)
|
||||
})
|
||||
.next_back()
|
||||
.map_or(0, |op| op.operation.end_index());
|
||||
|
||||
for cursor in left_cursors.chain(right_cursors) {
|
||||
merged_cursors.push(cursor.with_index(last_index));
|
||||
}
|
||||
|
||||
Self::new(self.text, merged_operations, merged_cursors)
|
||||
}
|
||||
|
||||
/// Apply the operations to the text and return the resulting text.
|
||||
#[must_use]
|
||||
pub fn apply(&self) -> String {
|
||||
let mut builder: StringBuilder<'_> = StringBuilder::new(self.text);
|
||||
|
||||
for OrderedOperation { operation, .. } in &self.operations {
|
||||
builder = operation.apply(builder);
|
||||
}
|
||||
|
||||
builder.build()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::env;
|
||||
|
||||
use insta::assert_debug_snapshot;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_calculate_operations() {
|
||||
let left = "hello world! How are you? Adam";
|
||||
let right = "Hello, my friend! How are you doing? Albert";
|
||||
|
||||
let operations = EditedText::from_strings(left, right.into());
|
||||
|
||||
insta::assert_debug_snapshot!(operations);
|
||||
|
||||
let new_right = operations.apply();
|
||||
assert_eq!(new_right.to_string(), right);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_calculate_operations_with_no_diff() {
|
||||
let text = "hello world!";
|
||||
|
||||
let operations = EditedText::from_strings(text, text.into());
|
||||
|
||||
assert_debug_snapshot!(operations);
|
||||
|
||||
let new_right = operations.apply();
|
||||
assert_eq!(new_right.to_string(), text);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_calculate_operations_with_insert() {
|
||||
let original = "hello world! ...";
|
||||
let left = "Hello world! I'm Andras.";
|
||||
let right = "Hello world! How are you?";
|
||||
let expected = "Hello world! How are you? I'm Andras.";
|
||||
|
||||
let operations_1 = EditedText::from_strings(original, left.into());
|
||||
let operations_2 = EditedText::from_strings(original, right.into());
|
||||
|
||||
let operations = operations_1.merge(operations_2);
|
||||
assert_eq!(operations.apply(), expected);
|
||||
}
|
||||
}
|
||||
73
src/operation_transformation/merge_context.rs
Normal file
73
src/operation_transformation/merge_context.rs
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
use core::fmt::Debug;
|
||||
|
||||
use crate::operation_transformation::Operation;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct MergeContext<T>
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
last_operation: Option<Operation<T>>,
|
||||
pub shift: i64,
|
||||
}
|
||||
|
||||
impl<T> Default for MergeContext<T>
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
fn default() -> Self {
|
||||
MergeContext {
|
||||
last_operation: None,
|
||||
shift: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> MergeContext<T>
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
pub fn last_operation(&self) -> Option<&Operation<T>> { self.last_operation.as_ref() }
|
||||
|
||||
pub fn replace_last_operation(&mut self, operation: Option<Operation<T>>) {
|
||||
self.last_operation = operation;
|
||||
}
|
||||
|
||||
/// Replace the last delete operation (if there was one) with a new one
|
||||
/// while applying it to the `shift` in case the last operation
|
||||
/// was a delete.
|
||||
pub fn consume_and_replace_last_operation(&mut self, operation: Option<Operation<T>>) {
|
||||
if let Some(Operation::Delete {
|
||||
deleted_character_count,
|
||||
..
|
||||
}) = self.last_operation.take()
|
||||
{
|
||||
self.shift -= deleted_character_count as i64;
|
||||
}
|
||||
|
||||
self.last_operation = operation;
|
||||
}
|
||||
|
||||
/// Remove the last operation (if there was one) in case it is behind the
|
||||
/// threshold operation. This updates the `shift` in case the last operation
|
||||
/// was a delete.
|
||||
pub fn consume_last_operation_if_it_is_too_behind(&mut self, threshold_index: i64) {
|
||||
if let Some(last_operation) = self.last_operation.as_ref() {
|
||||
if let Operation::Delete {
|
||||
deleted_character_count,
|
||||
..
|
||||
} = last_operation
|
||||
{
|
||||
if threshold_index + self.shift > last_operation.end_index() as i64 {
|
||||
self.shift -= *deleted_character_count as i64;
|
||||
self.last_operation = None;
|
||||
}
|
||||
} else if let Operation::Insert { .. } = last_operation
|
||||
&& threshold_index + self.shift - last_operation.len() as i64
|
||||
> last_operation.end_index() as i64
|
||||
{
|
||||
self.last_operation = None;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
513
src/operation_transformation/operation.rs
Normal file
513
src/operation_transformation/operation.rs
Normal file
|
|
@ -0,0 +1,513 @@
|
|||
use core::fmt::{Debug, Display};
|
||||
use std::ops::Range;
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::merge_context::MergeContext;
|
||||
use crate::{
|
||||
Token,
|
||||
utils::{
|
||||
find_longest_prefix_contained_within::find_longest_prefix_contained_within,
|
||||
string_builder::StringBuilder,
|
||||
},
|
||||
};
|
||||
|
||||
/// Represents a change that can be applied on a `StringBuilder`.
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Clone, PartialEq)]
|
||||
pub enum Operation<T>
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
Equal {
|
||||
index: usize,
|
||||
length: usize,
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
text: Option<String>,
|
||||
},
|
||||
|
||||
Insert {
|
||||
index: usize,
|
||||
text: Vec<Token<T>>,
|
||||
},
|
||||
|
||||
Delete {
|
||||
index: usize,
|
||||
deleted_character_count: usize,
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
deleted_text: Option<String>,
|
||||
},
|
||||
}
|
||||
|
||||
impl<T> Operation<T>
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
/// Creates an equal operation with the given index.
|
||||
/// This operation is used to indicate that the text at the given index
|
||||
/// is unchanged.
|
||||
pub fn create_equal(index: usize, length: usize) -> Option<Self> {
|
||||
if length == 0 {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(Operation::Equal {
|
||||
index,
|
||||
length,
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
text: None,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn create_equal_with_text(index: usize, text: String) -> Option<Self> {
|
||||
if text.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(Operation::Equal {
|
||||
index,
|
||||
length: text.chars().count(),
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
text: Some(text),
|
||||
})
|
||||
}
|
||||
|
||||
/// Creates an insert operation with the given index and text.
|
||||
/// If the text is empty (meaning that the operation would be a no-op),
|
||||
/// returns None.
|
||||
pub fn create_insert(index: usize, text: Vec<Token<T>>) -> Option<Self> {
|
||||
if text.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(Operation::Insert { index, text })
|
||||
}
|
||||
|
||||
/// Creates a delete operation with the given index and number of
|
||||
/// to-be-deleted characters. If the operation would delete 0 (meaning
|
||||
/// that the operation would be a no-op), returns None.
|
||||
pub fn create_delete(index: usize, deleted_character_count: usize) -> Option<Self> {
|
||||
if deleted_character_count == 0 {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(Operation::Delete {
|
||||
index,
|
||||
deleted_character_count,
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
deleted_text: None,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn create_delete_with_text(index: usize, text: String) -> Option<Self> {
|
||||
if text.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(Operation::Delete {
|
||||
index,
|
||||
deleted_character_count: text.chars().count(),
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
deleted_text: Some(text),
|
||||
})
|
||||
}
|
||||
|
||||
/// Applies the operation to the given `StringBuilder`, returning the
|
||||
/// modified `StringBuilder`.
|
||||
///
|
||||
/// When compiled in debug mode, panics if a delete operation is attempted
|
||||
/// on a range of text that does not match the text to be deleted.
|
||||
pub fn apply<'a>(&self, mut builder: StringBuilder<'a>) -> StringBuilder<'a> {
|
||||
match self {
|
||||
Operation::Equal {
|
||||
#[cfg(debug_assertions)]
|
||||
text,
|
||||
..
|
||||
} => {
|
||||
#[cfg(debug_assertions)]
|
||||
debug_assert!(
|
||||
text.as_ref()
|
||||
.is_none_or(|text| builder.get_slice(self.range()) == *text),
|
||||
"Text which is supposed to be equal does not match the text in the range"
|
||||
);
|
||||
|
||||
return builder;
|
||||
}
|
||||
Operation::Insert { text, .. } => builder.insert(
|
||||
self.start_index(),
|
||||
&text.iter().map(Token::original).collect::<String>(),
|
||||
),
|
||||
Operation::Delete {
|
||||
#[cfg(debug_assertions)]
|
||||
deleted_text,
|
||||
..
|
||||
} => {
|
||||
#[cfg(debug_assertions)]
|
||||
debug_assert!(
|
||||
deleted_text
|
||||
.as_ref()
|
||||
.is_none_or(|text| builder.get_slice(self.range()) == *text),
|
||||
"Text to delete does not match the text in the range"
|
||||
);
|
||||
|
||||
builder.delete(self.range());
|
||||
}
|
||||
}
|
||||
|
||||
builder
|
||||
}
|
||||
|
||||
/// Returns the index of the first character that the operation affects.
|
||||
pub fn start_index(&self) -> usize {
|
||||
match self {
|
||||
Operation::Equal { index, .. }
|
||||
| Operation::Insert { index, .. }
|
||||
| Operation::Delete { index, .. } => *index,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the index of the last character that the operation affects.
|
||||
pub fn end_index(&self) -> usize {
|
||||
debug_assert!(
|
||||
self.len() > 0,
|
||||
" len() must be greater than 0 because operations must be non-empty"
|
||||
);
|
||||
self.start_index() + self.len() - 1
|
||||
}
|
||||
|
||||
/// Returns the range of indices of characters that the operation affects.
|
||||
#[allow(clippy::range_plus_one)]
|
||||
pub fn range(&self) -> Range<usize> { self.start_index()..self.end_index() + 1 }
|
||||
|
||||
/// Returns the number of affected characters. It is always greater than 0
|
||||
/// because empty operations cannot be created.
|
||||
pub fn len(&self) -> usize {
|
||||
match self {
|
||||
Operation::Equal { length, .. } => *length,
|
||||
Operation::Insert { text, .. } => text.iter().map(Token::get_original_length).sum(),
|
||||
Operation::Delete {
|
||||
deleted_character_count,
|
||||
..
|
||||
} => *deleted_character_count,
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new operation with the same type and text but with the given
|
||||
/// index.
|
||||
pub fn with_index(self, index: usize) -> Self {
|
||||
match self {
|
||||
Operation::Equal {
|
||||
length,
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
text,
|
||||
..
|
||||
} => Operation::Equal {
|
||||
index,
|
||||
length,
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
text,
|
||||
},
|
||||
Operation::Insert { text, .. } => Operation::Insert { index, text },
|
||||
Operation::Delete {
|
||||
deleted_character_count,
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
deleted_text,
|
||||
..
|
||||
} => Operation::Delete {
|
||||
index,
|
||||
deleted_character_count,
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
deleted_text,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new operation with the same type and text but with the index
|
||||
/// shifted by the given offset. The offset can be negative but the
|
||||
/// resulting index must be non-negative.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// In debug mode, panics if the resulting index is negative.
|
||||
pub fn with_shifted_index(self, offset: i64) -> Self {
|
||||
let index = self.start_index() as i64 + offset;
|
||||
debug_assert!(index >= 0, "Shifted index must be non-negative");
|
||||
|
||||
self.with_index(index as usize)
|
||||
}
|
||||
|
||||
/// Merges the operation with the given context, producing a new operation
|
||||
/// and updating the context. This implements a comples FSM that handles
|
||||
/// the merging of operations in a way that is consistent with the text.
|
||||
/// The contexts are updated in-place.
|
||||
#[allow(clippy::too_many_lines)]
|
||||
pub fn merge_operations_with_context(
|
||||
self,
|
||||
affecting_context: &mut MergeContext<T>,
|
||||
produced_context: &mut MergeContext<T>,
|
||||
) -> Option<Operation<T>> {
|
||||
affecting_context.consume_last_operation_if_it_is_too_behind(self.start_index() as i64);
|
||||
let operation = self.with_shifted_index(affecting_context.shift);
|
||||
|
||||
match (operation, affecting_context.last_operation()) {
|
||||
(operation @ Operation::Insert { .. }, None | Some(Operation::Equal { .. })) => {
|
||||
produced_context.shift += operation.len() as i64;
|
||||
produced_context.consume_and_replace_last_operation(Some(operation.clone()));
|
||||
Some(operation)
|
||||
}
|
||||
|
||||
(
|
||||
Operation::Insert { text, index },
|
||||
Some(Operation::Insert {
|
||||
text: previous_inserted_text,
|
||||
..
|
||||
}),
|
||||
) => {
|
||||
// In case the current insert's prefix appears in the previously inserted text,
|
||||
// we can trim the current insert to only include the non-overlapping part.
|
||||
// This way, we don't end up duplicating text.
|
||||
let offset_in_tokens =
|
||||
find_longest_prefix_contained_within(previous_inserted_text, &text);
|
||||
let offset_in_length = text
|
||||
.iter()
|
||||
.take(offset_in_tokens)
|
||||
.map(Token::get_original_length)
|
||||
.sum::<usize>();
|
||||
let trimmed_operation =
|
||||
Operation::create_insert(index, text[offset_in_tokens..].to_vec());
|
||||
|
||||
affecting_context.shift -= offset_in_length as i64;
|
||||
produced_context.shift += trimmed_operation
|
||||
.as_ref()
|
||||
.map(Operation::len)
|
||||
.unwrap_or_default() as i64;
|
||||
produced_context.consume_and_replace_last_operation(trimmed_operation.clone());
|
||||
|
||||
trimmed_operation
|
||||
}
|
||||
|
||||
(
|
||||
operation @ Operation::Delete { .. },
|
||||
None | Some(Operation::Insert { .. } | Operation::Equal { .. }),
|
||||
) => {
|
||||
produced_context.consume_and_replace_last_operation(Some(operation.clone()));
|
||||
Some(operation)
|
||||
}
|
||||
|
||||
(
|
||||
operation @ Operation::Insert { .. },
|
||||
Some(last_delete @ Operation::Delete { .. }),
|
||||
) => {
|
||||
produced_context.shift += operation.len() as i64;
|
||||
|
||||
debug_assert!(
|
||||
last_delete.range().contains(&operation.start_index()),
|
||||
"There is a last delete ({last_delete}) but the operation ({operation}) is \
|
||||
not contained in it"
|
||||
);
|
||||
|
||||
let difference = operation.start_index() as i64 - last_delete.start_index() as i64;
|
||||
|
||||
let moved_operation = operation.with_index(last_delete.start_index());
|
||||
|
||||
affecting_context.replace_last_operation(Operation::create_delete(
|
||||
moved_operation.end_index() + 1,
|
||||
(last_delete.len() as i64 - difference) as usize,
|
||||
));
|
||||
affecting_context.shift -= difference;
|
||||
|
||||
produced_context.consume_and_replace_last_operation(Some(moved_operation.clone()));
|
||||
|
||||
Some(moved_operation)
|
||||
}
|
||||
|
||||
(
|
||||
operation @ Operation::Delete { .. },
|
||||
Some(last_delete @ Operation::Delete { .. }),
|
||||
) => {
|
||||
debug_assert!(
|
||||
last_delete.range().contains(&operation.start_index()),
|
||||
"There is a last delete ({last_delete}) but the operation ({operation}) is \
|
||||
not contained in it"
|
||||
);
|
||||
|
||||
let difference = operation.start_index() as i64 - last_delete.start_index() as i64;
|
||||
|
||||
let updated_delete = Operation::create_delete(
|
||||
last_delete.start_index(),
|
||||
0.max(operation.end_index() as i64 - last_delete.end_index() as i64) as usize,
|
||||
);
|
||||
|
||||
affecting_context.replace_last_operation(Operation::create_delete(
|
||||
last_delete.start_index(),
|
||||
0.max(last_delete.end_index() as i64 - operation.end_index() as i64) as usize,
|
||||
));
|
||||
affecting_context.shift -= difference;
|
||||
|
||||
produced_context.consume_and_replace_last_operation(updated_delete.clone());
|
||||
|
||||
updated_delete
|
||||
}
|
||||
(
|
||||
ref operation @ Operation::Equal {
|
||||
length,
|
||||
#[cfg(debug_assertions)]
|
||||
ref text,
|
||||
..
|
||||
},
|
||||
Some(last_delete @ Operation::Delete { .. }),
|
||||
) => {
|
||||
debug_assert!(
|
||||
last_delete.range().contains(&operation.start_index()),
|
||||
"There is a last delete ({last_delete}) but the operation ({operation}) is \
|
||||
not contained in it"
|
||||
);
|
||||
|
||||
let overlap = (length as i64)
|
||||
.min(last_delete.end_index() as i64 - operation.start_index() as i64 + 1);
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
let result = text.as_ref().map_or_else(
|
||||
|| {
|
||||
Operation::create_equal(
|
||||
operation.end_index().min(last_delete.end_index()),
|
||||
(length as i64 - overlap) as usize,
|
||||
)
|
||||
},
|
||||
|text| {
|
||||
Operation::create_equal_with_text(
|
||||
operation.end_index().min(last_delete.end_index()),
|
||||
text.chars().skip(overlap as usize).collect::<String>(),
|
||||
)
|
||||
},
|
||||
);
|
||||
|
||||
#[cfg(not(debug_assertions))]
|
||||
let result = Operation::create_equal(
|
||||
operation.end_index().min(last_delete.end_index()),
|
||||
(length as i64 - overlap) as usize,
|
||||
);
|
||||
|
||||
result
|
||||
}
|
||||
(operation @ Operation::Equal { .. }, _) => Some(operation),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Display for Operation<T>
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||
match self {
|
||||
Operation::Equal {
|
||||
index,
|
||||
length,
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
text,
|
||||
} => {
|
||||
#[cfg(debug_assertions)]
|
||||
write!(
|
||||
f,
|
||||
"<equal {} from index {}>",
|
||||
text.as_ref()
|
||||
.map(|text| format!("'{text}'"))
|
||||
.unwrap_or(format!("{length} characters")),
|
||||
index
|
||||
)?;
|
||||
|
||||
#[cfg(not(debug_assertions))]
|
||||
write!(f, "<equal {length} from index {index}>")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
Operation::Insert { index, text } => {
|
||||
write!(
|
||||
f,
|
||||
"<insert '{}' from index {}>",
|
||||
text.iter().map(Token::original).collect::<String>(),
|
||||
index
|
||||
)
|
||||
}
|
||||
Operation::Delete {
|
||||
index,
|
||||
deleted_character_count,
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
deleted_text,
|
||||
} => {
|
||||
#[cfg(debug_assertions)]
|
||||
write!(
|
||||
f,
|
||||
"<delete {} from index {}>",
|
||||
deleted_text
|
||||
.as_ref()
|
||||
.map(|text| format!("'{text}'"))
|
||||
.unwrap_or(format!("{deleted_character_count} characters")),
|
||||
index
|
||||
)?;
|
||||
|
||||
#[cfg(not(debug_assertions))]
|
||||
write!(
|
||||
f,
|
||||
"<delete {deleted_character_count} characters from index {index}>",
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Debug for Operation<T>
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { write!(f, "{self}") }
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
#[should_panic(expected = "Shifted index must be non-negative")]
|
||||
fn test_shifting_error() {
|
||||
insta::assert_debug_snapshot!(
|
||||
Operation::create_insert(1, vec!["hi".into()])
|
||||
.unwrap()
|
||||
.with_shifted_index(-2)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_apply_delete_with_create() {
|
||||
let builder = StringBuilder::new("hello world");
|
||||
let operation = Operation::<()>::create_delete_with_text(5, " world".to_owned()).unwrap();
|
||||
|
||||
assert_eq!(operation.apply(builder).build(), "hello");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_apply_insert() {
|
||||
let builder = StringBuilder::new("hello");
|
||||
let operation = Operation::create_insert(5, vec![" my friend".into()]).unwrap();
|
||||
|
||||
assert_eq!(operation.apply(builder).build(), "hello my friend");
|
||||
}
|
||||
}
|
||||
48
src/operation_transformation/ordered_operation.rs
Normal file
48
src/operation_transformation/ordered_operation.rs
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::operation_transformation::Operation;
|
||||
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct OrderedOperation<T>
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
pub order: usize,
|
||||
pub operation: Operation<T>,
|
||||
}
|
||||
|
||||
impl<T> OrderedOperation<T>
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
pub fn get_sort_key(&self) -> (usize, usize, String) {
|
||||
(
|
||||
self.order,
|
||||
self.operation.start_index(),
|
||||
// Make sure that the ordering is deterministic regardless of which text
|
||||
// is left or right.
|
||||
match &self.operation {
|
||||
Operation::Equal { index, .. } => index.to_string(),
|
||||
Operation::Insert { text, .. } => text
|
||||
.iter()
|
||||
.map(crate::tokenizer::token::Token::original)
|
||||
.collect::<String>(),
|
||||
Operation::Delete {
|
||||
deleted_character_count,
|
||||
..
|
||||
} => deleted_character_count.to_string(),
|
||||
},
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> PartialOrd for OrderedOperation<T>
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||
self.get_sort_key().partial_cmp(&other.get_sort_key())
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,43 @@
|
|||
---
|
||||
source: reconcile/src/operation_transformation/edited_text.rs
|
||||
expression: operations
|
||||
snapshot_kind: text
|
||||
---
|
||||
EditedText {
|
||||
text: "hello world! How are you? Adam",
|
||||
operations: [
|
||||
OrderedOperation {
|
||||
order: 0,
|
||||
operation: <insert 'Hello, my friend!' from index 0>,
|
||||
},
|
||||
OrderedOperation {
|
||||
order: 0,
|
||||
operation: <delete 'hello world!' from index 17>,
|
||||
},
|
||||
OrderedOperation {
|
||||
order: 12,
|
||||
operation: <equal ' ' from index 17>,
|
||||
},
|
||||
OrderedOperation {
|
||||
order: 13,
|
||||
operation: <equal 'How' from index 18>,
|
||||
},
|
||||
OrderedOperation {
|
||||
order: 16,
|
||||
operation: <equal ' ' from index 21>,
|
||||
},
|
||||
OrderedOperation {
|
||||
order: 17,
|
||||
operation: <equal 'are' from index 22>,
|
||||
},
|
||||
OrderedOperation {
|
||||
order: 20,
|
||||
operation: <insert ' you doing? Albert' from index 25>,
|
||||
},
|
||||
OrderedOperation {
|
||||
order: 20,
|
||||
operation: <delete ' you? Adam' from index 43>,
|
||||
},
|
||||
],
|
||||
cursors: [],
|
||||
}
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
---
|
||||
source: reconcile/src/operation_transformation/edited_text.rs
|
||||
expression: operations
|
||||
snapshot_kind: text
|
||||
---
|
||||
EditedText {
|
||||
text: "hello world!",
|
||||
operations: [
|
||||
OrderedOperation {
|
||||
order: 0,
|
||||
operation: <equal 'hello' from index 0>,
|
||||
},
|
||||
OrderedOperation {
|
||||
order: 5,
|
||||
operation: <equal ' ' from index 5>,
|
||||
},
|
||||
OrderedOperation {
|
||||
order: 6,
|
||||
operation: <equal 'world!' from index 6>,
|
||||
},
|
||||
],
|
||||
cursors: [],
|
||||
}
|
||||
|
|
@ -0,0 +1,61 @@
|
|||
---
|
||||
source: reconcile/src/operations/edited_text.rs
|
||||
expression: operations
|
||||
snapshot_kind: text
|
||||
---
|
||||
EditedText {
|
||||
text: "hello world! How are you? Adam",
|
||||
operations: [
|
||||
OrderedOperation {
|
||||
order: 0,
|
||||
operation: Insert {
|
||||
index: 0,
|
||||
text: "Hello, my friend! ",
|
||||
},
|
||||
},
|
||||
OrderedOperation {
|
||||
order: 0,
|
||||
operation: Delete {
|
||||
index: 18,
|
||||
deleted_character_count: 13,
|
||||
deleted_text: Some(
|
||||
"hello world! ",
|
||||
),
|
||||
},
|
||||
},
|
||||
OrderedOperation {
|
||||
order: 21,
|
||||
operation: Delete {
|
||||
index: 26,
|
||||
deleted_character_count: 5,
|
||||
deleted_text: Some(
|
||||
"you? ",
|
||||
),
|
||||
},
|
||||
},
|
||||
OrderedOperation {
|
||||
order: 26,
|
||||
operation: Delete {
|
||||
index: 26,
|
||||
deleted_character_count: 5,
|
||||
deleted_text: Some(
|
||||
" Adam",
|
||||
),
|
||||
},
|
||||
},
|
||||
OrderedOperation {
|
||||
order: 31,
|
||||
operation: Insert {
|
||||
index: 26,
|
||||
text: "you ",
|
||||
},
|
||||
},
|
||||
OrderedOperation {
|
||||
order: 31,
|
||||
operation: Insert {
|
||||
index: 30,
|
||||
text: "doing? Albert",
|
||||
},
|
||||
},
|
||||
],
|
||||
}
|
||||
|
|
@ -0,0 +1,60 @@
|
|||
---
|
||||
source: reconcile/src/operations/operation_sequence.rs
|
||||
expression: operations
|
||||
snapshot_kind: text
|
||||
---
|
||||
EditedText {
|
||||
operations: [
|
||||
OrderedOperation {
|
||||
order: 0,
|
||||
operation: Insert {
|
||||
index: 0,
|
||||
text: "Hello, my friend! ",
|
||||
},
|
||||
},
|
||||
OrderedOperation {
|
||||
order: 0,
|
||||
operation: Delete {
|
||||
index: 18,
|
||||
deleted_character_count: 13,
|
||||
deleted_text: Some(
|
||||
"hello world! ",
|
||||
),
|
||||
},
|
||||
},
|
||||
OrderedOperation {
|
||||
order: 21,
|
||||
operation: Delete {
|
||||
index: 26,
|
||||
deleted_character_count: 5,
|
||||
deleted_text: Some(
|
||||
"you? ",
|
||||
),
|
||||
},
|
||||
},
|
||||
OrderedOperation {
|
||||
order: 26,
|
||||
operation: Delete {
|
||||
index: 26,
|
||||
deleted_character_count: 5,
|
||||
deleted_text: Some(
|
||||
" Adam",
|
||||
),
|
||||
},
|
||||
},
|
||||
OrderedOperation {
|
||||
order: 31,
|
||||
operation: Insert {
|
||||
index: 26,
|
||||
text: "you ",
|
||||
},
|
||||
},
|
||||
OrderedOperation {
|
||||
order: 31,
|
||||
operation: Insert {
|
||||
index: 30,
|
||||
text: "doing? Albert",
|
||||
},
|
||||
},
|
||||
],
|
||||
}
|
||||
2
src/operation_transformation/utils.rs
Normal file
2
src/operation_transformation/utils.rs
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
pub mod cook_operations;
|
||||
pub mod elongate_operations;
|
||||
55
src/operation_transformation/utils/cook_operations.rs
Normal file
55
src/operation_transformation/utils/cook_operations.rs
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
use crate::{
|
||||
diffs::raw_operation::RawOperation,
|
||||
operation_transformation::{Operation, ordered_operation::OrderedOperation},
|
||||
};
|
||||
|
||||
/// Turn raw operations into ordered operations while keeping track of old & new
|
||||
/// indexes.
|
||||
pub fn cook_operations<I, T>(raw_operations: I) -> impl Iterator<Item = OrderedOperation<T>>
|
||||
where
|
||||
I: IntoIterator<Item = RawOperation<T>>,
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
let mut new_index = 0; // this is the start index of the operation on the new text
|
||||
let mut order = 0; // this is the start index of the operation on the original text
|
||||
|
||||
raw_operations.into_iter().filter_map(move |raw_operation| {
|
||||
let length = raw_operation.original_text_length();
|
||||
|
||||
match raw_operation {
|
||||
RawOperation::Equal(..) => {
|
||||
let op = if cfg!(debug_assertions) {
|
||||
Operation::create_equal_with_text(new_index, raw_operation.get_original_text())
|
||||
} else {
|
||||
Operation::create_equal(new_index, length)
|
||||
}
|
||||
.map(|operation| OrderedOperation { order, operation });
|
||||
|
||||
new_index += length;
|
||||
order += length;
|
||||
|
||||
op
|
||||
}
|
||||
RawOperation::Insert(tokens) => {
|
||||
let op = Operation::create_insert(new_index, tokens)
|
||||
.map(|operation| OrderedOperation { order, operation });
|
||||
|
||||
new_index += length;
|
||||
|
||||
op
|
||||
}
|
||||
RawOperation::Delete(..) => {
|
||||
let op = if cfg!(debug_assertions) {
|
||||
Operation::create_delete_with_text(new_index, raw_operation.get_original_text())
|
||||
} else {
|
||||
Operation::create_delete(new_index, length)
|
||||
}
|
||||
.map(|operation| OrderedOperation { order, operation });
|
||||
|
||||
order += length;
|
||||
|
||||
op
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
127
src/operation_transformation/utils/elongate_operations.rs
Normal file
127
src/operation_transformation/utils/elongate_operations.rs
Normal file
|
|
@ -0,0 +1,127 @@
|
|||
use core::iter;
|
||||
|
||||
use crate::diffs::raw_operation::RawOperation;
|
||||
|
||||
/// Elongates the operations by merging adjacent insertions and deletions that
|
||||
/// can be joined. This makes the subsequent merging of operations more
|
||||
/// intuitive.
|
||||
pub fn elongate_operations<I, T>(raw_operations: I) -> Vec<RawOperation<T>>
|
||||
where
|
||||
I: IntoIterator<Item = RawOperation<T>>,
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
// This might look bad, but this makes sense. The inserts and deltes can be
|
||||
// interleaved, such as: IDIDID and we need to turn this into IIIDDD.
|
||||
// So we need to keep track of both the last insert and delete operations, not
|
||||
// just the last one.
|
||||
let mut maybe_previous_insert: Option<RawOperation<T>> = None;
|
||||
let mut maybe_previous_delete: Option<RawOperation<T>> = None;
|
||||
|
||||
let mut result: Vec<RawOperation<T>> = raw_operations
|
||||
.into_iter()
|
||||
.flat_map(|next| match next {
|
||||
RawOperation::Insert(..) => match maybe_previous_insert.take() {
|
||||
Some(prev) if prev.is_right_joinable() && next.is_left_joinable() => {
|
||||
maybe_previous_insert = Some(prev.extend(next));
|
||||
Box::new(iter::empty()) as Box<dyn Iterator<Item = RawOperation<T>>>
|
||||
}
|
||||
prev => {
|
||||
maybe_previous_insert = Some(next);
|
||||
Box::new(prev.into_iter())
|
||||
}
|
||||
},
|
||||
RawOperation::Delete(..) => match maybe_previous_delete.take() {
|
||||
Some(prev) if prev.is_right_joinable() && next.is_left_joinable() => {
|
||||
maybe_previous_delete = Some(prev.extend(next));
|
||||
Box::new(iter::empty()) as Box<dyn Iterator<Item = RawOperation<T>>>
|
||||
}
|
||||
prev => {
|
||||
maybe_previous_delete = Some(next);
|
||||
Box::new(prev.into_iter())
|
||||
}
|
||||
},
|
||||
RawOperation::Equal(..) => Box::new(
|
||||
maybe_previous_insert
|
||||
.take()
|
||||
.into_iter()
|
||||
.chain(maybe_previous_delete.take())
|
||||
.chain(iter::once(next)),
|
||||
) as Box<dyn Iterator<Item = RawOperation<T>>>,
|
||||
})
|
||||
.collect();
|
||||
|
||||
if let Some(prev) = maybe_previous_insert {
|
||||
result.push(prev);
|
||||
}
|
||||
|
||||
if let Some(prev) = maybe_previous_delete {
|
||||
result.push(prev);
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
// #[cfg(test)]
|
||||
// mod tests {
|
||||
|
||||
// use super::*;
|
||||
|
||||
// #[test]
|
||||
// fn test_elongate_operations_empty() {
|
||||
// let operations: Vec<RawOperation<()>> = vec![];
|
||||
// let result = elongate_operations(operations);
|
||||
// assert_eq!(result, vec![]);
|
||||
// }
|
||||
|
||||
// #[test]
|
||||
// fn test_elongate_operations_single_operation() {
|
||||
// let operations = vec![RawOperation::Insert(vec!["test".into()])];
|
||||
// let result = elongate_operations(operations);
|
||||
// assert_eq!(result.len(), 1);
|
||||
// assert!(matches!(result[0], RawOperation::Insert(_)));
|
||||
// }
|
||||
|
||||
// #[test]
|
||||
// fn test_elongate_operations_interleaved() {
|
||||
// let operations = vec![
|
||||
// RawOperation::Insert(vec!["a".into()]),
|
||||
// RawOperation::Delete(vec!["b".into()]),
|
||||
// RawOperation::Insert(vec!["c".into()]),
|
||||
// RawOperation::Delete(vec!["d".into()]),
|
||||
// ];
|
||||
// let result = elongate_operations(operations);
|
||||
// assert_eq!(result.len(), 2);
|
||||
// assert!(matches!(result[0], RawOperation::Insert(_)));
|
||||
// assert!(matches!(result[1], RawOperation::Delete(_)));
|
||||
// }
|
||||
|
||||
// #[test]
|
||||
// fn test_elongate_operations_with_equal() {
|
||||
// let operations = vec![
|
||||
// RawOperation::Equal(vec!["a".into()]),
|
||||
// RawOperation::Equal(vec!["b".into()]),
|
||||
// RawOperation::Insert(vec!["c".into()]),
|
||||
// RawOperation::Insert(vec!["d".into()]),
|
||||
// ];
|
||||
// let result = elongate_operations(operations);
|
||||
// assert_eq!(result.len(), 2);
|
||||
// assert!(matches!(result[0], RawOperation::Equal(_)));
|
||||
// assert!(matches!(result[1], RawOperation::Insert(_)));
|
||||
// }
|
||||
|
||||
// #[test]
|
||||
// fn test_elongate_operations_mixed_sequence() {
|
||||
// let operations = vec![
|
||||
// RawOperation::Insert(vec!["a".into()]),
|
||||
// RawOperation::Equal(vec!["b".into()]),
|
||||
// RawOperation::Delete(vec!["c".into()]),
|
||||
// RawOperation::Equal(vec!["d".into()]),
|
||||
// ];
|
||||
// let result = elongate_operations(operations);
|
||||
// assert_eq!(result.len(), 4);
|
||||
// assert!(matches!(result[0], RawOperation::Insert(_)));
|
||||
// assert!(matches!(result[1], RawOperation::Equal(_)));
|
||||
// assert!(matches!(result[2], RawOperation::Delete(_)));
|
||||
// assert!(matches!(result[3], RawOperation::Equal(_)));
|
||||
// }
|
||||
// }
|
||||
7
src/tokenizer.rs
Normal file
7
src/tokenizer.rs
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
use token::Token;
|
||||
|
||||
pub mod token;
|
||||
pub mod word_tokenizer;
|
||||
|
||||
/// A trait for tokenizers that take a string and return a list of tokens.
|
||||
pub type Tokenizer<T> = dyn Fn(&str) -> Vec<Token<T>>;
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
source: reconcile/src/tokenizer/word_tokenizer.rs
|
||||
expression: "word_tokenizer(\"\")"
|
||||
snapshot_kind: text
|
||||
---
|
||||
[]
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
---
|
||||
source: reconcile/src/tokenizer/word_tokenizer.rs
|
||||
expression: "word_tokenizer(\" what? \")"
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
Token {
|
||||
normalised: " what?",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: "what?",
|
||||
original: "what?",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: " ",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
]
|
||||
|
|
@ -0,0 +1,55 @@
|
|||
---
|
||||
source: reconcile/src/tokenizer/word_tokenizer.rs
|
||||
expression: "word_tokenizer(\" hello, \\nwhere are you?\")"
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
Token {
|
||||
normalised: " hello,",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: "hello,",
|
||||
original: "hello,",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: " \nwhere",
|
||||
original: " \n",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: "where",
|
||||
original: "where",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: " are",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: "are",
|
||||
original: "are",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: " you?",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: "you?",
|
||||
original: "you?",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
]
|
||||
|
|
@ -0,0 +1,39 @@
|
|||
---
|
||||
source: reconcile/src/tokenizer/word_tokenizer.rs
|
||||
expression: "word_tokenizer(\" hello, \\nwhere are you?\")"
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
Token {
|
||||
normalised: " ",
|
||||
original: " ",
|
||||
},
|
||||
Token {
|
||||
normalised: "hello,",
|
||||
original: "hello,",
|
||||
},
|
||||
Token {
|
||||
normalised: " \n",
|
||||
original: " \n",
|
||||
},
|
||||
Token {
|
||||
normalised: "where",
|
||||
original: "where",
|
||||
},
|
||||
Token {
|
||||
normalised: " ",
|
||||
original: " ",
|
||||
},
|
||||
Token {
|
||||
normalised: "are",
|
||||
original: "are",
|
||||
},
|
||||
Token {
|
||||
normalised: " ",
|
||||
original: " ",
|
||||
},
|
||||
Token {
|
||||
normalised: "you?",
|
||||
original: "you?",
|
||||
},
|
||||
]
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
---
|
||||
source: reconcile/src/tokenizer/word_tokenizer.rs
|
||||
expression: "word_tokenizer(\"Hi there!\")"
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
Token {
|
||||
normalised: "Hi",
|
||||
original: "Hi",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: " there!",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: "there!",
|
||||
original: "there!",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
]
|
||||
68
src/tokenizer/token.rs
Normal file
68
src/tokenizer/token.rs
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// A token is a string that has been normalised in some way.
|
||||
///
|
||||
/// A token consists of the normalised form is used for comparison, and the
|
||||
/// original form used for subsequently applying `Operation`-s to a text
|
||||
/// document.
|
||||
///
|
||||
/// It's UTF-8 compatible.
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Token<T>
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
/// The normalised form of the token used deriving the diff.
|
||||
normalised: T,
|
||||
|
||||
/// The original string, that should be inserted or deleted in the document.
|
||||
original: String,
|
||||
|
||||
/// Whether the token is semantically joinable with the previous token.
|
||||
pub is_left_joinable: bool,
|
||||
|
||||
/// Whether the token is semantically joinable with the next token.
|
||||
pub is_right_joinable: bool,
|
||||
}
|
||||
|
||||
/// Trivial implementation of Token when the normalised form is the same as the
|
||||
/// original string.
|
||||
impl From<&str> for Token<String> {
|
||||
fn from(text: &str) -> Self { Token::new(text.to_owned(), text.to_owned(), true, true) }
|
||||
}
|
||||
|
||||
impl<T> Token<T>
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
pub fn new(
|
||||
normalised: T,
|
||||
original: String,
|
||||
is_left_joinable: bool,
|
||||
is_right_joinable: bool,
|
||||
) -> Self {
|
||||
Token {
|
||||
normalised,
|
||||
original,
|
||||
is_left_joinable,
|
||||
is_right_joinable,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn original(&self) -> &str { &self.original }
|
||||
|
||||
pub fn set_normalised(&mut self, normalised: T) { self.normalised = normalised; }
|
||||
|
||||
pub fn normalised(&self) -> &T { &self.normalised }
|
||||
|
||||
pub fn get_original_length(&self) -> usize { self.original.chars().count() }
|
||||
}
|
||||
|
||||
impl<T> PartialEq for Token<T>
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
fn eq(&self, other: &Self) -> bool { self.normalised == other.normalised }
|
||||
}
|
||||
61
src/tokenizer/word_tokenizer.rs
Normal file
61
src/tokenizer/word_tokenizer.rs
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
use super::token::Token;
|
||||
|
||||
/// Splits text on word boundaries creating tokens of alternating words and
|
||||
/// whitespaces with the whitespaces getting unique IDs.
|
||||
///
|
||||
/// ## Example
|
||||
///
|
||||
/// ```not_rust
|
||||
/// "Hi there!" -> ["Hi", " ", "there!"]
|
||||
/// ```
|
||||
pub fn word_tokenizer(text: &str) -> Vec<Token<String>> {
|
||||
let mut result: Vec<Token<String>> = Vec::new();
|
||||
|
||||
let mut previous_boundary_index = 0;
|
||||
let mut previous_char_is_whitespace = text.chars().next().is_none_or(char::is_whitespace);
|
||||
|
||||
for (i, c) in text.char_indices() {
|
||||
let is_current_char_whitespace = c.is_whitespace();
|
||||
if previous_char_is_whitespace != is_current_char_whitespace {
|
||||
result.push(text[previous_boundary_index..i].into());
|
||||
previous_boundary_index = i;
|
||||
}
|
||||
|
||||
previous_char_is_whitespace = is_current_char_whitespace;
|
||||
}
|
||||
|
||||
if previous_boundary_index < text.len() {
|
||||
result.push(text[previous_boundary_index..].into());
|
||||
}
|
||||
|
||||
if result.is_empty() {
|
||||
return result;
|
||||
}
|
||||
|
||||
for i in 0..result.len() - 1 {
|
||||
if result[i].original().chars().all(char::is_whitespace) {
|
||||
let normalised = result[i].normalised().to_owned() + result[i + 1].original();
|
||||
result[i].set_normalised(normalised);
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use insta::assert_debug_snapshot;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_with_snapshots() {
|
||||
assert_debug_snapshot!(word_tokenizer("Hi there!"));
|
||||
|
||||
assert_debug_snapshot!(word_tokenizer(""));
|
||||
|
||||
assert_debug_snapshot!(word_tokenizer(" what? "));
|
||||
|
||||
assert_debug_snapshot!(word_tokenizer(" hello, \nwhere are you?"));
|
||||
}
|
||||
}
|
||||
5
src/utils.rs
Normal file
5
src/utils.rs
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
pub mod common_prefix_len;
|
||||
pub mod common_suffix_len;
|
||||
pub mod find_longest_prefix_contained_within;
|
||||
pub mod side;
|
||||
pub mod string_builder;
|
||||
47
src/utils/common_prefix_len.rs
Normal file
47
src/utils/common_prefix_len.rs
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
use core::ops::{Index, Range};
|
||||
|
||||
/// Given two lookups and ranges calculates the length of the common prefix.
|
||||
/// Copied from <https://github.com/mitsuhiko/similar/blob/7e15c44de11a1cd61e1149189929e189ef977fd8/src/algorithms/utils.rs>
|
||||
pub fn common_prefix_len<Old, New>(
|
||||
old: &Old,
|
||||
old_range: Range<usize>,
|
||||
new: &New,
|
||||
new_range: Range<usize>,
|
||||
) -> usize
|
||||
where
|
||||
Old: Index<usize> + ?Sized,
|
||||
New: Index<usize> + ?Sized,
|
||||
New::Output: PartialEq<Old::Output>,
|
||||
{
|
||||
new_range
|
||||
.zip(old_range)
|
||||
.take_while(|x| new[x.0] == old[x.1])
|
||||
.count()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_common_prefix_len() {
|
||||
assert_eq!(
|
||||
common_prefix_len("".as_bytes(), 0..0, "".as_bytes(), 0..0),
|
||||
0
|
||||
);
|
||||
assert_eq!(
|
||||
common_prefix_len("foobarbaz".as_bytes(), 0..9, "foobarblah".as_bytes(), 0..10),
|
||||
7
|
||||
);
|
||||
assert_eq!(
|
||||
common_prefix_len("foobarbaz".as_bytes(), 0..9, "blablabla".as_bytes(), 0..9),
|
||||
0
|
||||
);
|
||||
assert_eq!(
|
||||
common_prefix_len("foobarbaz".as_bytes(), 3..9, "foobarblah".as_bytes(), 3..10),
|
||||
4
|
||||
);
|
||||
}
|
||||
}
|
||||
48
src/utils/common_suffix_len.rs
Normal file
48
src/utils/common_suffix_len.rs
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
use core::ops::{Index, Range};
|
||||
|
||||
/// Given two lookups and ranges calculates the length of common suffix.
|
||||
/// Copied from <https://github.com/mitsuhiko/similar/blob/7e15c44de11a1cd61e1149189929e189ef977fd8/src/algorithms/utils.rs>
|
||||
pub fn common_suffix_len<Old, New>(
|
||||
old: &Old,
|
||||
old_range: Range<usize>,
|
||||
new: &New,
|
||||
new_range: Range<usize>,
|
||||
) -> usize
|
||||
where
|
||||
Old: Index<usize> + ?Sized,
|
||||
New: Index<usize> + ?Sized,
|
||||
New::Output: PartialEq<Old::Output>,
|
||||
{
|
||||
new_range
|
||||
.rev()
|
||||
.zip(old_range.rev())
|
||||
.take_while(|x| new[x.0] == old[x.1])
|
||||
.count()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_common_suffix_len() {
|
||||
assert_eq!(
|
||||
common_suffix_len("".as_bytes(), 0..0, "".as_bytes(), 0..0),
|
||||
0
|
||||
);
|
||||
assert_eq!(
|
||||
common_suffix_len("1234".as_bytes(), 0..4, "X0001234".as_bytes(), 0..8),
|
||||
4
|
||||
);
|
||||
assert_eq!(
|
||||
common_suffix_len("1234".as_bytes(), 0..4, "Xxxx".as_bytes(), 0..4),
|
||||
0
|
||||
);
|
||||
assert_eq!(
|
||||
common_suffix_len("1234".as_bytes(), 2..4, "01234".as_bytes(), 2..5),
|
||||
2
|
||||
);
|
||||
}
|
||||
}
|
||||
103
src/utils/find_longest_prefix_contained_within.rs
Normal file
103
src/utils/find_longest_prefix_contained_within.rs
Normal file
|
|
@ -0,0 +1,103 @@
|
|||
use crate::Token;
|
||||
|
||||
/// Given two lists of tokens, returns `length` where `old` list somewhere
|
||||
/// within contains the `length` prefix of the `new` list.
|
||||
///
|
||||
/// ## Example
|
||||
///
|
||||
/// ```not_rust
|
||||
/// old: [0, 1, 9, 0, 2, 5]
|
||||
/// new: [9, 0, 2, 5, 1]
|
||||
/// ```
|
||||
/// > results in an length of 4
|
||||
///
|
||||
///
|
||||
/// ```not_rust
|
||||
/// old: [0, 1, 9, 0, 2, 5]
|
||||
/// new: [0, 2]
|
||||
/// ```
|
||||
/// > results in an length of 2
|
||||
///
|
||||
/// ```not_rust
|
||||
/// old: [0, 1, 9, 0, 2, 5]
|
||||
/// new: [0, 4]
|
||||
/// ```
|
||||
/// > results in an length of 1
|
||||
pub fn find_longest_prefix_contained_within<T>(old: &[Token<T>], new: &[Token<T>]) -> usize
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
let max_possible = new.len().min(old.len());
|
||||
|
||||
for len in (1..=max_possible).rev() {
|
||||
let prefix = &new[..len];
|
||||
if old.windows(len).any(|window| window == prefix) {
|
||||
return len;
|
||||
}
|
||||
}
|
||||
|
||||
0
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_common_overlap() {
|
||||
assert_eq!(
|
||||
find_longest_prefix_contained_within(&["".into()], &["".into()]),
|
||||
1
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
find_longest_prefix_contained_within(
|
||||
&["a".into(), "b".into(), "c".into()],
|
||||
&["b".into(), "c".into(), "a".into()]
|
||||
),
|
||||
2
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
find_longest_prefix_contained_within(
|
||||
&["a".into(), "b".into(), "c".into()],
|
||||
&["b".into(), "c".into()]
|
||||
),
|
||||
2
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
find_longest_prefix_contained_within(
|
||||
&["a".into(), "b".into(), "c".into()],
|
||||
&["b".into()]
|
||||
),
|
||||
1
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
find_longest_prefix_contained_within(
|
||||
&["a".into(), "b".into(), "c".into(), "b".into(), "a".into()],
|
||||
&["b".into(), "a".into()]
|
||||
),
|
||||
2
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
find_longest_prefix_contained_within(
|
||||
&["a".into(), "a".into(), "a".into()],
|
||||
&["a".into(), "b".into(), "c".into()]
|
||||
),
|
||||
1
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
find_longest_prefix_contained_within(
|
||||
&["a".into(), "b".into(), "c".into()],
|
||||
&["d".into(), "e".into(), "a".into()]
|
||||
),
|
||||
0
|
||||
);
|
||||
}
|
||||
}
|
||||
16
src/utils/side.rs
Normal file
16
src/utils/side.rs
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
use std::fmt::Display;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum Side {
|
||||
Left,
|
||||
Right,
|
||||
}
|
||||
|
||||
impl Display for Side {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Side::Left => write!(f, "Left"),
|
||||
Side::Right => write!(f, "Right"),
|
||||
}
|
||||
}
|
||||
}
|
||||
111
src/utils/string_builder.rs
Normal file
111
src/utils/string_builder.rs
Normal file
|
|
@ -0,0 +1,111 @@
|
|||
use core::ops::Range;
|
||||
|
||||
/// A helper for building a string in order based on an original string and a
|
||||
/// series of insertions and deletions applied to it. It is safe to use with
|
||||
/// UTF-8 strings as all operations are based on character indices.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct StringBuilder<'a> {
|
||||
original: &'a str,
|
||||
last_old_char_index: usize,
|
||||
buffer: String,
|
||||
}
|
||||
|
||||
impl StringBuilder<'_> {
|
||||
pub fn new(original: &str) -> StringBuilder<'_> {
|
||||
StringBuilder {
|
||||
original,
|
||||
last_old_char_index: 0,
|
||||
buffer: String::with_capacity(original.len()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Insert a string at the given index after copying the original string up
|
||||
/// to that index from the last insertion or deletion.
|
||||
pub fn insert(&mut self, from: usize, text: &str) {
|
||||
self.copy_until(from);
|
||||
self.buffer.push_str(text);
|
||||
}
|
||||
|
||||
/// Delete a string at the given index after copying the original string up
|
||||
/// to that index from the last insertion or deletion.
|
||||
pub fn delete(&mut self, range: core::ops::Range<usize>) {
|
||||
self.copy_until(range.start);
|
||||
self.last_old_char_index += range.len();
|
||||
}
|
||||
|
||||
fn copy_until(&mut self, index: usize) {
|
||||
let current_char_count = self.buffer.chars().count();
|
||||
debug_assert!(
|
||||
index >= current_char_count,
|
||||
"String builder only support building in order"
|
||||
);
|
||||
|
||||
let jump = index - current_char_count;
|
||||
|
||||
self.buffer.push_str(
|
||||
&self
|
||||
.original
|
||||
.chars()
|
||||
.skip(self.last_old_char_index)
|
||||
.take(jump)
|
||||
.collect::<String>(),
|
||||
);
|
||||
self.last_old_char_index += jump;
|
||||
}
|
||||
|
||||
/// Finish building the string after copying the remaining original string
|
||||
/// since the last insertion or deletion.
|
||||
pub fn build(mut self) -> String {
|
||||
self.buffer.push_str(
|
||||
&self
|
||||
.original
|
||||
.chars()
|
||||
.skip(self.last_old_char_index)
|
||||
.collect::<String>(),
|
||||
);
|
||||
|
||||
self.buffer
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn get_slice(&self, range: Range<usize>) -> String {
|
||||
let result = self
|
||||
.buffer
|
||||
.chars()
|
||||
.chain(self.original.chars().skip(self.last_old_char_index))
|
||||
.skip(range.start)
|
||||
.take(range.end - range.start)
|
||||
.collect::<String>();
|
||||
|
||||
debug_assert_eq!(result.chars().count(), range.len(), "Range out of bounds",);
|
||||
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_string_builder() {
|
||||
let original = "aaa bbb ccc";
|
||||
let mut builder = StringBuilder::new(original);
|
||||
|
||||
builder.insert(0, "ddd ");
|
||||
builder.delete(4..8);
|
||||
builder.insert(11, " eee");
|
||||
|
||||
assert_eq!(builder.build(), "ddd bbb ccc eee");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_string_builder2() {
|
||||
let original = "abcde";
|
||||
let mut builder = StringBuilder::new(original);
|
||||
|
||||
builder.delete(1..4);
|
||||
|
||||
assert_eq!(builder.build(), "ae");
|
||||
}
|
||||
}
|
||||
2
src/wasm.rs
Normal file
2
src/wasm.rs
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
pub mod cursor;
|
||||
pub mod lib;
|
||||
88
src/wasm/cursor.rs
Normal file
88
src/wasm/cursor.rs
Normal file
|
|
@ -0,0 +1,88 @@
|
|||
use wasm_bindgen::prelude::*;
|
||||
|
||||
/// Wrapper type to expose `TextWithCursors` to JS.
|
||||
#[wasm_bindgen]
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct JsTextWithCursors {
|
||||
text: String,
|
||||
cursors: Vec<JsCursorPosition>,
|
||||
}
|
||||
|
||||
#[wasm_bindgen]
|
||||
impl JsTextWithCursors {
|
||||
#[wasm_bindgen(constructor)]
|
||||
#[must_use]
|
||||
pub fn new(text: String, cursors: Vec<JsCursorPosition>) -> Self { Self { text, cursors } }
|
||||
|
||||
#[must_use]
|
||||
pub fn text(&self) -> String { self.text.clone() }
|
||||
|
||||
#[must_use]
|
||||
pub fn cursors(&self) -> Vec<JsCursorPosition> { self.cursors.clone() }
|
||||
}
|
||||
|
||||
impl From<JsTextWithCursors> for crate::TextWithCursors<'_> {
|
||||
fn from(owned: JsTextWithCursors) -> Self {
|
||||
crate::TextWithCursors::new_owned(
|
||||
owned.text.to_string(),
|
||||
owned
|
||||
.cursors
|
||||
.into_iter()
|
||||
.map(std::convert::Into::into)
|
||||
.collect(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<crate::TextWithCursors<'_>> for JsTextWithCursors {
|
||||
fn from(text_with_cursors: crate::TextWithCursors<'_>) -> Self {
|
||||
JsTextWithCursors {
|
||||
text: text_with_cursors.text.into_owned(),
|
||||
cursors: text_with_cursors
|
||||
.cursors
|
||||
.into_iter()
|
||||
.map(std::convert::Into::into)
|
||||
.collect(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Wrapper type to expose `CursorPosition` to JS.
|
||||
#[wasm_bindgen]
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct JsCursorPosition {
|
||||
id: usize,
|
||||
char_index: usize,
|
||||
}
|
||||
|
||||
#[wasm_bindgen]
|
||||
impl JsCursorPosition {
|
||||
#[wasm_bindgen(constructor)]
|
||||
#[must_use]
|
||||
pub fn new(id: usize, char_index: usize) -> Self { Self { id, char_index } }
|
||||
|
||||
#[must_use]
|
||||
pub fn id(&self) -> usize { self.id }
|
||||
|
||||
#[wasm_bindgen(js_name = characterPosition)]
|
||||
#[must_use]
|
||||
pub fn char_index(&self) -> usize { self.char_index }
|
||||
}
|
||||
|
||||
impl From<JsCursorPosition> for crate::CursorPosition {
|
||||
fn from(owned: JsCursorPosition) -> Self {
|
||||
crate::CursorPosition {
|
||||
id: owned.id,
|
||||
char_index: owned.char_index,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<crate::CursorPosition> for JsCursorPosition {
|
||||
fn from(cursor: crate::CursorPosition) -> Self {
|
||||
JsCursorPosition {
|
||||
id: cursor.id,
|
||||
char_index: cursor.char_index,
|
||||
}
|
||||
}
|
||||
}
|
||||
105
src/wasm/lib.rs
Normal file
105
src/wasm/lib.rs
Normal file
|
|
@ -0,0 +1,105 @@
|
|||
//! This crate provides utilities for easily communicating between backend &
|
||||
//! frontend and ensuring the same logic for encoding and decoding binary data,
|
||||
//! and 3-way-merging documents in Rust and JavaScript.
|
||||
//!
|
||||
//! The crate is designed to be used as a Rust library and as a
|
||||
//! TypeScript/JavaScript package through WebAssembly (WASM).
|
||||
//!
|
||||
//! # Modules
|
||||
//!
|
||||
//! - `errors`: Contains error types used in this crate.
|
||||
|
||||
use core::str;
|
||||
|
||||
use wasm_bindgen::prelude::*;
|
||||
|
||||
use crate::wasm::cursor::JsTextWithCursors;
|
||||
|
||||
/// Merge two documents with a common parent. Relies on `reconcile::reconcile`
|
||||
/// for texts and returns the right document as-is if either of the updated
|
||||
/// documents is binary.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// - `parent`: The common parent document.
|
||||
/// - `left`: The left document updated by one user.
|
||||
/// - `right`: The right document updated by another user.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// The merged document.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// If any of the input documents are not valid UTF-8 strings.
|
||||
#[wasm_bindgen]
|
||||
#[must_use]
|
||||
pub fn merge(parent: &[u8], left: &[u8], right: &[u8]) -> Vec<u8> {
|
||||
set_panic_hook();
|
||||
|
||||
if is_binary(parent) || is_binary(left) || is_binary(right) {
|
||||
right.to_vec()
|
||||
} else {
|
||||
crate::reconcile(
|
||||
str::from_utf8(parent).expect("parent must be valid UTF-8 because it's not binary"),
|
||||
str::from_utf8(left).expect("left must be valid UTF-8 because it's not binary"),
|
||||
str::from_utf8(right).expect("right must be valid UTF-8 because it's not binary"),
|
||||
)
|
||||
.into_bytes()
|
||||
}
|
||||
}
|
||||
|
||||
/// WASM wrapper around `crate::reconcile` for merging text.
|
||||
#[wasm_bindgen(js_name = mergeText)]
|
||||
#[must_use]
|
||||
pub fn merge_text(parent: &str, left: &str, right: &str) -> String {
|
||||
set_panic_hook();
|
||||
|
||||
crate::reconcile(parent, left, right)
|
||||
}
|
||||
|
||||
/// WASM wrapper around `reconcile::reconcile_with_cursors` for merging text.
|
||||
#[wasm_bindgen(js_name = mergeTextWithCursors)]
|
||||
#[must_use]
|
||||
pub fn merge_text_with_cursors(
|
||||
parent: &str,
|
||||
left: JsTextWithCursors,
|
||||
right: JsTextWithCursors,
|
||||
) -> JsTextWithCursors {
|
||||
set_panic_hook();
|
||||
|
||||
crate::reconcile_with_cursors(parent, left.into(), right.into()).into()
|
||||
}
|
||||
|
||||
/// Heuristically determine if the given data is a binary or a text file's
|
||||
/// content.
|
||||
#[wasm_bindgen(js_name = isBinary)]
|
||||
#[must_use]
|
||||
pub fn is_binary(data: &[u8]) -> bool {
|
||||
set_panic_hook();
|
||||
|
||||
if data.contains(&0) {
|
||||
// Even though the NUL character is valid in UTF-8, it's highly suspicious in
|
||||
// human-readable text.
|
||||
return true;
|
||||
}
|
||||
|
||||
std::str::from_utf8(data).is_err()
|
||||
}
|
||||
|
||||
/// We don't want to support merging structured data like JSON, YAML, etc.
|
||||
#[wasm_bindgen(js_name = isFileTypeMergable)]
|
||||
#[must_use]
|
||||
pub fn is_file_type_mergable(path_or_file_name: &str) -> bool {
|
||||
set_panic_hook();
|
||||
|
||||
let file_extension = path_or_file_name.split('.').next_back().unwrap_or_default();
|
||||
|
||||
matches!(file_extension.to_lowercase().as_str(), "md" | "txt")
|
||||
}
|
||||
|
||||
fn set_panic_hook() {
|
||||
// https://github.com/rustwasm/console_error_panic_hook#readme
|
||||
#[cfg(feature = "console_error_panic_hook")]
|
||||
console_error_panic_hook::set_once();
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue