wip
This commit is contained in:
parent
a471bf6855
commit
7f6973389f
21 changed files with 30682 additions and 236 deletions
18
backend/reconcile/Cargo.toml
Normal file
18
backend/reconcile/Cargo.toml
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
[package]
|
||||
name = "reconcile"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
ropey = { version = "1.6.1", default-features = false, features = ["simd"] } #
|
||||
thiserror = {workspace = true}
|
||||
log = {workspace = true}
|
||||
serde = { version = "1.0.215", optional = true }
|
||||
|
||||
[features]
|
||||
serde = [ "dep:serde" ]
|
||||
|
||||
[dev-dependencies]
|
||||
insta = "1.41.1"
|
||||
itertools = "0.13.0"
|
||||
pretty_assertions = "1.4.1"
|
||||
165
backend/reconcile/src/diffs/lcs.rs
Normal file
165
backend/reconcile/src/diffs/lcs.rs
Normal file
|
|
@ -0,0 +1,165 @@
|
|||
//! LCS diff algorithm.
|
||||
//!
|
||||
//! * time: `O((NM)D log (M)D)`
|
||||
//! * space `O(MN)`
|
||||
use std::collections::BTreeMap;
|
||||
use std::ops::{Index, Range};
|
||||
|
||||
use crate::tokenizer::token::Token;
|
||||
|
||||
use super::raw_operation::RawOperation;
|
||||
use super::utils::{common_prefix_len, common_suffix_len};
|
||||
|
||||
/// LCS diff algorithm.
|
||||
/// Copied from https://github.com/mitsuhiko/similar/blob/7e15c44de11a1cd61e1149189929e189ef977fd8/src/algorithms/lcs.rs
|
||||
pub fn diff(old: &[Token], new: &[Token]) -> Vec<RawOperation> {
|
||||
let common_prefix_len = common_prefix_len(old, 0..old.len(), new, 0..new.len());
|
||||
let common_suffix_len = common_suffix_len(
|
||||
old,
|
||||
common_prefix_len..old.len(),
|
||||
new,
|
||||
common_prefix_len..new.len(),
|
||||
);
|
||||
|
||||
let maybe_table = make_table(
|
||||
old,
|
||||
common_prefix_len..(old.len() - common_suffix_len),
|
||||
new,
|
||||
common_prefix_len..(new.len() - common_suffix_len),
|
||||
);
|
||||
let mut old_idx = 0;
|
||||
let mut new_idx = 0;
|
||||
let new_len = new.len() - common_prefix_len - common_suffix_len;
|
||||
let old_len = old.len() - common_prefix_len - common_suffix_len;
|
||||
|
||||
let mut result: Vec<RawOperation> = Vec::new();
|
||||
if common_prefix_len > 0 {
|
||||
result.push(RawOperation::Equal(old[0..common_prefix_len].to_vec()));
|
||||
}
|
||||
|
||||
if let Some(table) = maybe_table {
|
||||
while new_idx < new_len && old_idx < old_len {
|
||||
let old_orig_idx = common_prefix_len + old_idx;
|
||||
let new_orig_idx = common_prefix_len + new_idx;
|
||||
|
||||
if new[new_orig_idx] == old[old_orig_idx] {
|
||||
result.push(RawOperation::Equal(vec![old[old_orig_idx].clone()]));
|
||||
old_idx += 1;
|
||||
new_idx += 1;
|
||||
} else if table.get(&(new_idx, old_idx + 1)).unwrap_or(&0)
|
||||
>= table.get(&(new_idx + 1, old_idx)).unwrap_or(&0)
|
||||
{
|
||||
result.push(RawOperation::Delete(vec![old[old_orig_idx].clone()]));
|
||||
old_idx += 1;
|
||||
} else {
|
||||
result.push(RawOperation::Insert(vec![new[new_orig_idx].clone()]));
|
||||
new_idx += 1;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let old_orig_idx = common_prefix_len + old_idx;
|
||||
let new_orig_idx = common_prefix_len + new_idx;
|
||||
|
||||
result.push(RawOperation::Delete(
|
||||
old[old_orig_idx..old_orig_idx + old_len].to_vec(),
|
||||
));
|
||||
result.push(RawOperation::Insert(
|
||||
new[new_orig_idx..new_orig_idx + new_len].to_vec(),
|
||||
));
|
||||
}
|
||||
|
||||
if old_idx < old_len {
|
||||
result.push(RawOperation::Delete(
|
||||
old[common_prefix_len + old_idx..common_prefix_len + old_len].to_vec(),
|
||||
));
|
||||
old_idx += old_len - old_idx;
|
||||
}
|
||||
|
||||
if new_idx < new_len {
|
||||
result.push(RawOperation::Insert(
|
||||
new[common_prefix_len + new_idx..common_prefix_len + new_len].to_vec(),
|
||||
));
|
||||
}
|
||||
|
||||
if common_suffix_len > 0 {
|
||||
result.push(RawOperation::Equal(
|
||||
old[old_len + common_prefix_len..old_len + common_prefix_len + common_suffix_len]
|
||||
.to_vec(),
|
||||
));
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
fn make_table<Old, New>(
|
||||
old: &Old,
|
||||
old_range: Range<usize>,
|
||||
new: &New,
|
||||
new_range: Range<usize>,
|
||||
) -> Option<BTreeMap<(usize, usize), u32>>
|
||||
where
|
||||
Old: Index<usize> + ?Sized,
|
||||
New: Index<usize> + ?Sized,
|
||||
New::Output: PartialEq<Old::Output>,
|
||||
{
|
||||
let old_len = old_range.len();
|
||||
let new_len = new_range.len();
|
||||
let mut table = BTreeMap::new();
|
||||
|
||||
for i in (0..new_len).rev() {
|
||||
for j in (0..old_len).rev() {
|
||||
let val = if new[i] == old[j] {
|
||||
table.get(&(i + 1, j + 1)).unwrap_or(&0) + 1
|
||||
} else {
|
||||
*table
|
||||
.get(&(i + 1, j))
|
||||
.unwrap_or(&0)
|
||||
.max(table.get(&(i, j + 1)).unwrap_or(&0))
|
||||
};
|
||||
if val > 0 {
|
||||
table.insert((i, j), val);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Some(table)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use pretty_assertions::assert_eq;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
#[test]
|
||||
fn test_table() {
|
||||
let table = make_table(&vec![2, 3], 0..2, &vec![0, 1, 2], 0..3).unwrap();
|
||||
let expected = {
|
||||
let mut m = BTreeMap::new();
|
||||
m.insert((1, 0), 1);
|
||||
m.insert((0, 0), 1);
|
||||
m.insert((2, 0), 1);
|
||||
m
|
||||
};
|
||||
assert_eq!(table, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_examples() {
|
||||
assert_eq!(diff(&[], &[]), vec![]);
|
||||
assert_eq!(
|
||||
diff(&[Token::new("a".to_string(), "a".to_string())], &[]),
|
||||
vec![RawOperation::Delete(vec![Token::new(
|
||||
"a".to_string(),
|
||||
"a".to_string()
|
||||
)])]
|
||||
);
|
||||
assert_eq!(
|
||||
diff(&[], &[Token::new("a".to_string(), "a".to_string())]),
|
||||
vec![RawOperation::Insert(vec![Token::new(
|
||||
"a".to_string(),
|
||||
"a".to_string()
|
||||
)])]
|
||||
);
|
||||
}
|
||||
}
|
||||
4
backend/reconcile/src/diffs/mod.rs
Normal file
4
backend/reconcile/src/diffs/mod.rs
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
pub mod lcs;
|
||||
pub mod myers;
|
||||
pub mod raw_operation;
|
||||
mod utils;
|
||||
310
backend/reconcile/src/diffs/myers.rs
Normal file
310
backend/reconcile/src/diffs/myers.rs
Normal file
|
|
@ -0,0 +1,310 @@
|
|||
//! Taken from https://github.com/mitsuhiko/similar/blob/7e15c44de11a1cd61e1149189929e189ef977fd8/src/algorithms/myers.rs
|
||||
//! Myers' diff algorithm.
|
||||
//!
|
||||
//! * time: `O((N+M)D)`
|
||||
//! * space `O(N+M)`
|
||||
//!
|
||||
//! See [the original article by Eugene W. Myers](http://www.xmailserver.org/diff2.pdf)
|
||||
//! describing it.
|
||||
//!
|
||||
//! The implementation of this algorithm is based on the implementation by
|
||||
//! Brandon Williams.
|
||||
//!
|
||||
//! # Heuristics
|
||||
//!
|
||||
//! At present this implementation of Myers' does not implement any more advanced
|
||||
//! heuristics that would solve some pathological cases. For instance passing two
|
||||
//! large and completely distinct sequences to the algorithm will make it spin
|
||||
//! without making reasonable progress. Currently the only protection in the
|
||||
//! library against this is to pass a deadline to the diffing algorithm.
|
||||
//!
|
||||
//! For potential improvements here see [similar#15](https://github.com/mitsuhiko/similar/issues/15).
|
||||
|
||||
use std::ops::{Index, IndexMut, Range};
|
||||
use std::time::Instant;
|
||||
use std::vec;
|
||||
|
||||
use crate::tokenizer::token::Token;
|
||||
|
||||
use super::raw_operation::RawOperation;
|
||||
use super::utils::{common_prefix_len, common_suffix_len};
|
||||
|
||||
/// Myers' diff algorithm.
|
||||
///
|
||||
/// Diff `old`, between indices `old_range` and `new` between indices `new_range`.
|
||||
pub fn diff(old: &[Token], new: &[Token]) -> Vec<RawOperation> {
|
||||
diff_deadline(old, new, None)
|
||||
}
|
||||
|
||||
/// Myers' diff algorithm with deadline.
|
||||
///
|
||||
/// Diff `old`, between indices `old_range` and `new` between indices `new_range`.
|
||||
///
|
||||
/// This diff is done with an optional deadline that defines the maximal
|
||||
/// execution time permitted before it bails and falls back to an approximation.
|
||||
pub fn diff_deadline(old: &[Token], new: &[Token], deadline: Option<Instant>) -> Vec<RawOperation> {
|
||||
let max_d = max_d(old.len(), new.len());
|
||||
let mut vb = V::new(max_d);
|
||||
let mut vf = V::new(max_d);
|
||||
let mut result: Vec<RawOperation> = vec![];
|
||||
conquer(
|
||||
old,
|
||||
0..old.len(),
|
||||
new,
|
||||
0..new.len(),
|
||||
&mut vf,
|
||||
&mut vb,
|
||||
&mut result,
|
||||
deadline,
|
||||
);
|
||||
result
|
||||
}
|
||||
|
||||
// A D-path is a path which starts at (0,0) that has exactly D non-diagonal
|
||||
// edges. All D-paths consist of a (D - 1)-path followed by a non-diagonal edge
|
||||
// and then a possibly empty sequence of diagonal edges called a snake.
|
||||
|
||||
/// `V` contains the endpoints of the furthest reaching `D-paths`. For each
|
||||
/// recorded endpoint `(x,y)` in diagonal `k`, we only need to retain `x` because
|
||||
/// `y` can be computed from `x - k`. In other words, `V` is an array of integers
|
||||
/// where `V[k]` contains the row index of the endpoint of the furthest reaching
|
||||
/// path in diagonal `k`.
|
||||
///
|
||||
/// We can't use a traditional Vec to represent `V` since we use `k` as an index
|
||||
/// and it can take on negative values. So instead `V` is represented as a
|
||||
/// light-weight wrapper around a Vec plus an `offset` which is the maximum value
|
||||
/// `k` can take on in order to map negative `k`'s back to a value >= 0.
|
||||
#[derive(Debug)]
|
||||
struct V {
|
||||
offset: isize,
|
||||
v: Vec<usize>, // Look into initializing this to -1 and storing isize
|
||||
}
|
||||
|
||||
impl V {
|
||||
fn new(max_d: usize) -> Self {
|
||||
Self {
|
||||
offset: max_d as isize,
|
||||
v: vec![0; 2 * max_d],
|
||||
}
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.v.len()
|
||||
}
|
||||
}
|
||||
|
||||
impl Index<isize> for V {
|
||||
type Output = usize;
|
||||
|
||||
fn index(&self, index: isize) -> &Self::Output {
|
||||
&self.v[(index + self.offset) as usize]
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexMut<isize> for V {
|
||||
fn index_mut(&mut self, index: isize) -> &mut Self::Output {
|
||||
&mut self.v[(index + self.offset) as usize]
|
||||
}
|
||||
}
|
||||
|
||||
fn max_d(len1: usize, len2: usize) -> usize {
|
||||
// XXX look into reducing the need to have the additional '+ 1'
|
||||
(len1 + len2 + 1) / 2 + 1
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn split_at(range: Range<usize>, at: usize) -> (Range<usize>, Range<usize>) {
|
||||
(range.start..at, at..range.end)
|
||||
}
|
||||
|
||||
/// A `Snake` is a sequence of diagonal edges in the edit graph. Normally
|
||||
/// a snake has a start end end point (and it is possible for a snake to have
|
||||
/// a length of zero, meaning the start and end points are the same) however
|
||||
/// we do not need the end point which is why it's not implemented here.
|
||||
///
|
||||
/// The divide part of a divide-and-conquer strategy. A D-path has D+1 snakes
|
||||
/// some of which may be empty. The divide step requires finding the ceil(D/2) +
|
||||
/// 1 or middle snake of an optimal D-path. The idea for doing so is to
|
||||
/// simultaneously run the basic algorithm in both the forward and reverse
|
||||
/// directions until furthest reaching forward and reverse paths starting at
|
||||
/// opposing corners 'overlap'.
|
||||
fn find_middle_snake(
|
||||
old: &[Token],
|
||||
old_range: Range<usize>,
|
||||
new: &[Token],
|
||||
new_range: Range<usize>,
|
||||
vf: &mut V,
|
||||
vb: &mut V,
|
||||
deadline: Option<Instant>,
|
||||
) -> Option<(usize, usize)> {
|
||||
let n = old_range.len();
|
||||
let m = new_range.len();
|
||||
|
||||
// By Lemma 1 in the paper, the optimal edit script length is odd or even as
|
||||
// `delta` is odd or even.
|
||||
let delta = n as isize - m as isize;
|
||||
let odd = delta & 1 == 1;
|
||||
|
||||
// The initial point at (0, -1)
|
||||
vf[1] = 0;
|
||||
// The initial point at (N, M+1)
|
||||
vb[1] = 0;
|
||||
|
||||
// We only need to explore ceil(D/2) + 1
|
||||
let d_max = max_d(n, m);
|
||||
assert!(vf.len() >= d_max);
|
||||
assert!(vb.len() >= d_max);
|
||||
|
||||
for d in 0..d_max as isize {
|
||||
// are we running for too long?
|
||||
if let Some(deadline) = deadline {
|
||||
if Instant::now() > deadline {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Forward path
|
||||
for k in (-d..=d).rev().step_by(2) {
|
||||
let mut x = if k == -d || (k != d && vf[k - 1] < vf[k + 1]) {
|
||||
vf[k + 1]
|
||||
} else {
|
||||
vf[k - 1] + 1
|
||||
};
|
||||
let y = (x as isize - k) as usize;
|
||||
|
||||
// The coordinate of the start of a snake
|
||||
let (x0, y0) = (x, y);
|
||||
// While these sequences are identical, keep moving through the
|
||||
// graph with no cost
|
||||
if x < old_range.len() && y < new_range.len() {
|
||||
let advance = common_prefix_len(
|
||||
old,
|
||||
old_range.start + x..old_range.end,
|
||||
new,
|
||||
new_range.start + y..new_range.end,
|
||||
);
|
||||
x += advance;
|
||||
}
|
||||
|
||||
// This is the new best x value
|
||||
vf[k] = x;
|
||||
|
||||
// Only check for connections from the forward search when N - M is
|
||||
// odd and when there is a reciprocal k line coming from the other
|
||||
// direction.
|
||||
if odd && (k - delta).abs() <= (d - 1) {
|
||||
// TODO optimize this so we don't have to compare against n
|
||||
if vf[k] + vb[-(k - delta)] >= n {
|
||||
// Return the snake
|
||||
return Some((x0 + old_range.start, y0 + new_range.start));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Backward path
|
||||
for k in (-d..=d).rev().step_by(2) {
|
||||
let mut x = if k == -d || (k != d && vb[k - 1] < vb[k + 1]) {
|
||||
vb[k + 1]
|
||||
} else {
|
||||
vb[k - 1] + 1
|
||||
};
|
||||
let mut y = (x as isize - k) as usize;
|
||||
|
||||
// The coordinate of the start of a snake
|
||||
if x < n && y < m {
|
||||
let advance = common_suffix_len(
|
||||
old,
|
||||
old_range.start..old_range.start + n - x,
|
||||
new,
|
||||
new_range.start..new_range.start + m - y,
|
||||
);
|
||||
x += advance;
|
||||
y += advance;
|
||||
}
|
||||
|
||||
// This is the new best x value
|
||||
vb[k] = x;
|
||||
|
||||
if !odd && (k - delta).abs() <= d {
|
||||
// TODO optimize this so we don't have to compare against n
|
||||
if vb[k] + vf[-(k - delta)] >= n {
|
||||
// Return the snake
|
||||
return Some((n - x + old_range.start, m - y + new_range.start));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Maybe there's an opportunity to optimize and bail early?
|
||||
}
|
||||
|
||||
// deadline reached
|
||||
None
|
||||
}
|
||||
|
||||
fn conquer(
|
||||
old: &[Token],
|
||||
mut old_range: Range<usize>,
|
||||
new: &[Token],
|
||||
mut new_range: Range<usize>,
|
||||
vf: &mut V,
|
||||
vb: &mut V,
|
||||
result: &mut Vec<RawOperation>,
|
||||
deadline: Option<Instant>,
|
||||
) {
|
||||
// Check for common prefix
|
||||
let common_prefix_len = common_prefix_len(old, old_range.clone(), new, new_range.clone());
|
||||
if common_prefix_len > 0 {
|
||||
result.push(RawOperation::Equal(
|
||||
old[old_range.start..old_range.start + common_prefix_len].to_vec(),
|
||||
));
|
||||
}
|
||||
old_range.start += common_prefix_len;
|
||||
new_range.start += common_prefix_len;
|
||||
|
||||
// Check for common suffix
|
||||
let common_suffix_len = common_suffix_len(old, old_range.clone(), new, new_range.clone());
|
||||
let common_suffix = (
|
||||
old_range.end - common_suffix_len,
|
||||
new_range.end - common_suffix_len,
|
||||
);
|
||||
old_range.end -= common_suffix_len;
|
||||
new_range.end -= common_suffix_len;
|
||||
|
||||
if old_range.is_empty() && new_range.is_empty() {
|
||||
// Do nothing
|
||||
} else if new_range.is_empty() {
|
||||
result.push(RawOperation::Delete(
|
||||
old[old_range.start..old_range.start + old_range.len()].to_vec(),
|
||||
));
|
||||
} else if old_range.is_empty() {
|
||||
result.push(RawOperation::Insert(
|
||||
new[new_range.start..new_range.start + new_range.len()].to_vec(),
|
||||
));
|
||||
} else if let Some((x_start, y_start)) = find_middle_snake(
|
||||
old,
|
||||
old_range.clone(),
|
||||
new,
|
||||
new_range.clone(),
|
||||
vf,
|
||||
vb,
|
||||
deadline,
|
||||
) {
|
||||
let (old_a, old_b) = split_at(old_range, x_start);
|
||||
let (new_a, new_b) = split_at(new_range, y_start);
|
||||
conquer(old, old_a, new, new_a, vf, vb, result, deadline);
|
||||
conquer(old, old_b, new, new_b, vf, vb, result, deadline);
|
||||
} else {
|
||||
result.push(RawOperation::Delete(
|
||||
old[old_range.start..old_range.end].to_vec(),
|
||||
));
|
||||
result.push(RawOperation::Insert(
|
||||
new[new_range.start..new_range.end].to_vec(),
|
||||
));
|
||||
}
|
||||
|
||||
if common_suffix_len > 0 {
|
||||
result.push(RawOperation::Equal(
|
||||
old[common_suffix.0..common_suffix.0 + common_suffix_len].to_vec(),
|
||||
));
|
||||
}
|
||||
}
|
||||
47
backend/reconcile/src/diffs/raw_operation.rs
Normal file
47
backend/reconcile/src/diffs/raw_operation.rs
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
use crate::tokenizer::token::Token;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum RawOperation {
|
||||
Insert(Vec<Token>),
|
||||
Delete(Vec<Token>),
|
||||
Equal(Vec<Token>),
|
||||
}
|
||||
|
||||
impl RawOperation {
|
||||
pub fn tokens(&self) -> &Vec<Token> {
|
||||
match self {
|
||||
RawOperation::Insert(tokens) => tokens,
|
||||
RawOperation::Delete(tokens) => tokens,
|
||||
RawOperation::Equal(tokens) => tokens,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn original_text_length(&self) -> usize {
|
||||
self.tokens()
|
||||
.iter()
|
||||
.map(|t| t.original.chars().count())
|
||||
.sum()
|
||||
}
|
||||
|
||||
pub fn get_original_text(self) -> String {
|
||||
self.tokens().iter().map(|t| t.original.clone()).collect()
|
||||
}
|
||||
|
||||
/// Extends the operation with another operation if returning the new operation.
|
||||
/// Only operations of the same type can be used to extend. If the operations are of different
|
||||
/// types, returns None.
|
||||
pub fn extend(&self, other: &RawOperation) -> Option<RawOperation> {
|
||||
match (self, other) {
|
||||
(RawOperation::Insert(tokens1), RawOperation::Insert(tokens2)) => Some(
|
||||
RawOperation::Insert(tokens1.iter().chain(tokens2.iter()).cloned().collect()),
|
||||
),
|
||||
(RawOperation::Delete(tokens1), RawOperation::Delete(tokens2)) => Some(
|
||||
RawOperation::Delete(tokens1.iter().chain(tokens2.iter()).cloned().collect()),
|
||||
),
|
||||
(RawOperation::Equal(tokens1), RawOperation::Equal(tokens2)) => Some(
|
||||
RawOperation::Equal(tokens1.iter().chain(tokens2.iter()).cloned().collect()),
|
||||
),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
86
backend/reconcile/src/diffs/utils.rs
Normal file
86
backend/reconcile/src/diffs/utils.rs
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
use std::ops::{Index, Range};
|
||||
|
||||
/// Given two lookups and ranges calculates the length of the common prefix.
|
||||
/// Copied from https://github.com/mitsuhiko/similar/blob/7e15c44de11a1cd61e1149189929e189ef977fd8/src/algorithms/utils.rs
|
||||
pub fn common_prefix_len<Old, New>(
|
||||
old: &Old,
|
||||
old_range: Range<usize>,
|
||||
new: &New,
|
||||
new_range: Range<usize>,
|
||||
) -> usize
|
||||
where
|
||||
Old: Index<usize> + ?Sized,
|
||||
New: Index<usize> + ?Sized,
|
||||
New::Output: PartialEq<Old::Output>,
|
||||
{
|
||||
new_range
|
||||
.zip(old_range)
|
||||
.take_while(|x| new[x.0] == old[x.1])
|
||||
.count()
|
||||
}
|
||||
|
||||
/// Given two lookups and ranges calculates the length of common suffix.
|
||||
/// Copied from https://github.com/mitsuhiko/similar/blob/7e15c44de11a1cd61e1149189929e189ef977fd8/src/algorithms/utils.rs
|
||||
pub fn common_suffix_len<Old, New>(
|
||||
old: &Old,
|
||||
old_range: Range<usize>,
|
||||
new: &New,
|
||||
new_range: Range<usize>,
|
||||
) -> usize
|
||||
where
|
||||
Old: Index<usize> + ?Sized,
|
||||
New: Index<usize> + ?Sized,
|
||||
New::Output: PartialEq<Old::Output>,
|
||||
{
|
||||
new_range
|
||||
.rev()
|
||||
.zip(old_range.rev())
|
||||
.take_while(|x| new[x.0] == old[x.1])
|
||||
.count()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
#[test]
|
||||
fn test_common_prefix_len() {
|
||||
assert_eq!(
|
||||
common_prefix_len("".as_bytes(), 0..0, "".as_bytes(), 0..0),
|
||||
0
|
||||
);
|
||||
assert_eq!(
|
||||
common_prefix_len("foobarbaz".as_bytes(), 0..9, "foobarblah".as_bytes(), 0..10),
|
||||
7
|
||||
);
|
||||
assert_eq!(
|
||||
common_prefix_len("foobarbaz".as_bytes(), 0..9, "blablabla".as_bytes(), 0..9),
|
||||
0
|
||||
);
|
||||
assert_eq!(
|
||||
common_prefix_len("foobarbaz".as_bytes(), 3..9, "foobarblah".as_bytes(), 3..10),
|
||||
4
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_common_suffix_len() {
|
||||
assert_eq!(
|
||||
common_suffix_len("".as_bytes(), 0..0, "".as_bytes(), 0..0),
|
||||
0
|
||||
);
|
||||
assert_eq!(
|
||||
common_suffix_len("1234".as_bytes(), 0..4, "X0001234".as_bytes(), 0..8),
|
||||
4
|
||||
);
|
||||
assert_eq!(
|
||||
common_suffix_len("1234".as_bytes(), 0..4, "Xxxx".as_bytes(), 0..4),
|
||||
0
|
||||
);
|
||||
assert_eq!(
|
||||
common_suffix_len("1234".as_bytes(), 2..4, "01234".as_bytes(), 2..5),
|
||||
2
|
||||
);
|
||||
}
|
||||
}
|
||||
10
backend/reconcile/src/errors.rs
Normal file
10
backend/reconcile/src/errors.rs
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
use thiserror::Error;
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum SyncLibError {
|
||||
#[error("Failed to shift the operation's index {0}")]
|
||||
NegativeOperationIndexError(String),
|
||||
|
||||
#[error("Failed to apply operation because {0}")]
|
||||
OperationApplicationError(String),
|
||||
}
|
||||
4
backend/reconcile/src/lib.rs
Normal file
4
backend/reconcile/src/lib.rs
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
mod diffs;
|
||||
pub mod errors;
|
||||
pub mod operations;
|
||||
mod tokenizer;
|
||||
25
backend/reconcile/src/operations/mod.rs
Normal file
25
backend/reconcile/src/operations/mod.rs
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
mod operation;
|
||||
mod operation_sequence;
|
||||
|
||||
pub use operation::Operation;
|
||||
pub use operation_sequence::OperationSequence;
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
|
||||
#[test]
|
||||
fn test_merge() {
|
||||
// let mut original = Rope::from_str("hello world!");
|
||||
// let edit_1 = "hi, world";
|
||||
// let edit_2 = "hello, my friend!";
|
||||
|
||||
// let mut operations_1 = calculate_operations(&original.to_string(), edit_1, 1.0).unwrap();
|
||||
// let mut operations_2 = calculate_operations(&original.to_string(), edit_2, 1.0).unwrap();
|
||||
|
||||
// let result =
|
||||
// merge_and_apply_operations(&mut original, &mut operations_1, &mut operations_2)
|
||||
// .unwrap();
|
||||
|
||||
// assert_eq!(result, "hey, my friend!");
|
||||
}
|
||||
}
|
||||
|
|
@ -1,13 +1,16 @@
|
|||
use ropey::Rope;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::fmt::Display;
|
||||
|
||||
use crate::errors::SyncLibError;
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Represents a change that can be applied to a text document.
|
||||
/// Operation is tied to a ropey::Rope and is mainly expected to be
|
||||
/// created by OperationSequence.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub enum Operation {
|
||||
Insert {
|
||||
index: usize,
|
||||
|
|
@ -17,57 +20,88 @@ pub enum Operation {
|
|||
Delete {
|
||||
index: usize,
|
||||
deleted_character_count: usize,
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
deleted_text: Option<String>,
|
||||
},
|
||||
}
|
||||
|
||||
impl Operation {
|
||||
/// Creates an insert operation with the given index and text.
|
||||
/// If the text is empty (meaning that the operation would be a no-op), returns None.
|
||||
pub fn create_insert(index: usize, text: &str) -> Result<Option<Self>, SyncLibError> {
|
||||
pub fn create_insert(index: usize, text: String) -> Option<Self> {
|
||||
if text.is_empty() {
|
||||
return Ok(None);
|
||||
return None;
|
||||
}
|
||||
|
||||
Ok(Some(Operation::Insert {
|
||||
index,
|
||||
text: text.to_string(),
|
||||
}))
|
||||
Some(Operation::Insert { index, text })
|
||||
}
|
||||
|
||||
/// Creates a delete operation with the given index and number of to-be-deleted characters.
|
||||
/// If the operation would delete 0 (meaning that the operation would be a no-op), returns None.
|
||||
pub fn create_delete(
|
||||
index: usize,
|
||||
deleted_character_count: usize,
|
||||
) -> Result<Option<Self>, SyncLibError> {
|
||||
pub fn create_delete(index: usize, deleted_character_count: usize) -> Option<Self> {
|
||||
if deleted_character_count == 0 {
|
||||
return Ok(None);
|
||||
return None;
|
||||
}
|
||||
|
||||
Ok(Some(Operation::Delete {
|
||||
Some(Operation::Delete {
|
||||
index,
|
||||
deleted_character_count,
|
||||
}))
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
deleted_text: None,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn create_delete_with_text(index: usize, text: String) -> Option<Self> {
|
||||
if text.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(Operation::Delete {
|
||||
index,
|
||||
deleted_character_count: text.chars().count(),
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
deleted_text: Some(text),
|
||||
})
|
||||
}
|
||||
|
||||
/// Tries to apply the operation to the given ropey::Rope text, returning the modified text.
|
||||
pub fn apply<'a>(&self, rope_text: &'a mut Rope) -> Result<&'a mut Rope, SyncLibError> {
|
||||
let index: usize = self.start_index();
|
||||
match self {
|
||||
Operation::Insert { text, .. } => rope_text.try_insert(index, text).map_err(|err| {
|
||||
SyncLibError::OperationApplicationError(format!("Failed to insert text: {}", err))
|
||||
}),
|
||||
Operation::Delete {
|
||||
deleted_character_count,
|
||||
..
|
||||
} => rope_text
|
||||
.try_remove(index..index + { *deleted_character_count })
|
||||
Operation::Insert { text, .. } => rope_text
|
||||
.try_insert(self.start_index(), text)
|
||||
.map_err(|err| {
|
||||
SyncLibError::OperationApplicationError(format!(
|
||||
"Failed to insert text: {}",
|
||||
err
|
||||
))
|
||||
}),
|
||||
Operation::Delete {
|
||||
#[cfg(debug_assertions)]
|
||||
deleted_text,
|
||||
..
|
||||
} => {
|
||||
debug_assert!(
|
||||
rope_text.get_slice(self.range()).is_some(),
|
||||
"Failed to get slice of text to delete"
|
||||
);
|
||||
|
||||
if let Some(text) = deleted_text {
|
||||
debug_assert_eq!(
|
||||
rope_text.get_slice(self.range()).unwrap().to_string(),
|
||||
*text
|
||||
);
|
||||
}
|
||||
|
||||
rope_text.try_remove(self.range()).map_err(|err| {
|
||||
SyncLibError::OperationApplicationError(format!(
|
||||
"Failed to remove text: {}",
|
||||
err
|
||||
))
|
||||
}),
|
||||
})
|
||||
}
|
||||
}?;
|
||||
|
||||
Ok(rope_text)
|
||||
|
|
@ -104,33 +138,40 @@ impl Operation {
|
|||
}
|
||||
|
||||
/// Clones the operation while updating the index.
|
||||
pub fn with_index(&self, index: usize) -> Result<Self, SyncLibError> {
|
||||
Ok(match self {
|
||||
pub fn with_index(&self, index: usize) -> Self {
|
||||
match self {
|
||||
Operation::Insert { text, .. } => Operation::Insert {
|
||||
index,
|
||||
text: text.clone(),
|
||||
},
|
||||
Operation::Delete {
|
||||
deleted_character_count,
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
deleted_text,
|
||||
..
|
||||
} => Operation::Delete {
|
||||
index,
|
||||
deleted_character_count: *deleted_character_count,
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
deleted_text: deleted_text.clone(),
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Clones the operation while shifting the index by the given offset.
|
||||
/// The offset can be negative but the resulting index must be non-negative.
|
||||
pub fn with_shifted_index(&self, offset: i64) -> Result<Self, SyncLibError> {
|
||||
let index = self.start_index() as i64 + offset;
|
||||
|
||||
self.with_index(index.try_into().map_err(|_| {
|
||||
let non_negative_index = index.try_into().map_err(|_| {
|
||||
SyncLibError::NegativeOperationIndexError(format!(
|
||||
"Index {} is negative but operations must have a non-negative index",
|
||||
index
|
||||
))
|
||||
})?)
|
||||
})?;
|
||||
|
||||
Ok(self.with_index(non_negative_index))
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -138,17 +179,29 @@ impl Display for Operation {
|
|||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Operation::Insert { index, text } => {
|
||||
write!(f, "Insert '{}' from index {}", text, index)
|
||||
write!(f, "<insert '{}' from index {}>", text, index)
|
||||
}
|
||||
Operation::Delete {
|
||||
index,
|
||||
deleted_character_count,
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
deleted_text,
|
||||
} => {
|
||||
write!(
|
||||
f,
|
||||
"Delete {} characters index {}",
|
||||
deleted_character_count, index
|
||||
)
|
||||
if cfg!(debug_assertions) {
|
||||
write!(
|
||||
f,
|
||||
"<delete '{}' from index {}>",
|
||||
deleted_text.as_ref().unwrap_or(&"<unknown>".to_string()),
|
||||
index
|
||||
)
|
||||
} else {
|
||||
write!(
|
||||
f,
|
||||
"<delete {} characters () from index {}>",
|
||||
deleted_character_count, index
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -161,31 +214,15 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_shifting_error() {
|
||||
insta::assert_debug_snapshot!(Operation::create_insert(1, "hi")
|
||||
.unwrap()
|
||||
insta::assert_debug_snapshot!(Operation::create_insert(1, "hi".to_string())
|
||||
.unwrap()
|
||||
.with_shifted_index(-2));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_apply_delete() -> Result<(), SyncLibError> {
|
||||
let mut rope = Rope::from_str("hello world");
|
||||
let operation = Operation::Delete {
|
||||
index: 5,
|
||||
deleted_character_count: 6,
|
||||
};
|
||||
|
||||
operation.apply(&mut rope)?;
|
||||
|
||||
assert_eq!(rope.to_string(), "hello");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_apply_delete_with_create() -> Result<(), SyncLibError> {
|
||||
let mut rope = Rope::from_str("hello world");
|
||||
let operation = Operation::create_delete(5, 6)?.unwrap();
|
||||
let operation = Operation::create_delete_with_text(5, "world ".to_string()).unwrap();
|
||||
|
||||
operation.apply(&mut rope)?;
|
||||
|
||||
|
|
@ -197,22 +234,7 @@ mod tests {
|
|||
#[test]
|
||||
fn test_apply_insert() -> Result<(), SyncLibError> {
|
||||
let mut rope = Rope::from_str("hello");
|
||||
let operation = Operation::Insert {
|
||||
index: 5,
|
||||
text: " my friend".to_string(),
|
||||
};
|
||||
|
||||
operation.apply(&mut rope)?;
|
||||
|
||||
assert_eq!(rope.to_string(), "hello my friend");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_apply_insert_with_create() -> Result<(), SyncLibError> {
|
||||
let mut rope = Rope::from_str("hello");
|
||||
let operation = Operation::create_insert(5, " my friend")?.unwrap();
|
||||
let operation = Operation::create_insert(5, " my friend".to_string()).unwrap();
|
||||
|
||||
operation.apply(&mut rope)?;
|
||||
|
||||
|
|
@ -1,81 +1,90 @@
|
|||
use std::cmp::Ordering;
|
||||
|
||||
use super::Operation;
|
||||
use crate::diffs::myers::diff;
|
||||
use crate::diffs::raw_operation::RawOperation;
|
||||
use crate::errors::SyncLibError;
|
||||
use crate::tokenizer::token::Token;
|
||||
use ropey::Rope;
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
use similar::Algorithm;
|
||||
use similar::{utils::TextDiffRemapper, ChangeTag, TextDiff};
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
struct MergeContext {
|
||||
previous_delete: Option<Operation>,
|
||||
last_delete: Option<Operation>,
|
||||
shift: i64,
|
||||
}
|
||||
|
||||
pub fn tokenize(text: &str) -> Vec<&str> {
|
||||
text.split_inclusive(|c: char| c.is_whitespace()).collect()
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
|
||||
/// A sequence of operations that can be applied to a text document.
|
||||
/// OperationSequence supports merging two sequences of operations using the
|
||||
/// principle of Operational Transformation.
|
||||
///
|
||||
/// It's mainly created through the from_strings method, then merged with another
|
||||
/// OperationSequence derived from the same original text and then applied to the original text
|
||||
/// to get the reconciled text of concurrent edits.
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash, Default)]
|
||||
pub struct OperationSequence {
|
||||
operations: Vec<Operation>,
|
||||
}
|
||||
|
||||
impl OperationSequence {
|
||||
/// Creates a new OperationSequence with the given operations.
|
||||
/// The operations should be in the order they should be applied.
|
||||
/// The operations must not overlap.
|
||||
pub fn new(operations: Vec<Operation>) -> Self {
|
||||
operations
|
||||
.iter()
|
||||
.zip(operations.iter().skip(1))
|
||||
.for_each(|(previous, next)| {
|
||||
debug_assert!(
|
||||
previous.start_index() <= next.start_index(),
|
||||
"{} doesn't come before {}",
|
||||
previous,
|
||||
next
|
||||
);
|
||||
});
|
||||
|
||||
Self { operations }
|
||||
}
|
||||
|
||||
pub fn try_from_string_diff(
|
||||
left: &str,
|
||||
right: &str,
|
||||
diff_ratio_threshold: f32,
|
||||
) -> Result<Self, SyncLibError> {
|
||||
let left_tokens = tokenize(left);
|
||||
let right_tokens = tokenize(right);
|
||||
/// Creates an OperationSequence from the given original (old) and updated (new) strings.
|
||||
/// The returned OperationSequence represents the changes from the original to the updated text.
|
||||
/// When the return value is applied to the original text, it will result in the updated text.
|
||||
pub fn from_strings(original: &str, updated: &str) -> Self {
|
||||
let original_tokens = Token::tokenize(original);
|
||||
let updated_tokens = Token::tokenize(updated);
|
||||
|
||||
let diff = TextDiff::configure()
|
||||
.algorithm(Algorithm::Patience)
|
||||
.diff_slices(&left_tokens, &right_tokens);
|
||||
let diff: Vec<RawOperation> = diff(&original_tokens, &updated_tokens);
|
||||
|
||||
let diff_ratio = 1.0 - diff.ratio();
|
||||
if diff_ratio > diff_ratio_threshold {
|
||||
return Err(SyncLibError::DiffTooLarge {
|
||||
diff_ratio,
|
||||
diff_ratio_limit: diff_ratio_threshold,
|
||||
});
|
||||
}
|
||||
|
||||
let remapper = TextDiffRemapper::from_text_diff(&diff, left, right);
|
||||
|
||||
let mut index = 0;
|
||||
diff.ops()
|
||||
.iter()
|
||||
.flat_map(move |x| remapper.iter_slices(x))
|
||||
.map(|(tag, text)| match tag {
|
||||
ChangeTag::Equal => {
|
||||
index += text.chars().count();
|
||||
Ok(None)
|
||||
}
|
||||
ChangeTag::Insert => {
|
||||
let result = Operation::create_insert(index, text);
|
||||
index += text.chars().count();
|
||||
result
|
||||
}
|
||||
ChangeTag::Delete => Operation::create_delete(index, text.chars().count()),
|
||||
})
|
||||
.flat_map(|result| result.transpose().into_iter())
|
||||
.collect::<Result<Vec<_>, SyncLibError>>()
|
||||
.map(Self::new)
|
||||
Self::new(Self::raw_operations_to_operations(diff))
|
||||
}
|
||||
|
||||
pub fn apply<'a>(&self, rope_text: &'a mut Rope) -> Result<&'a mut Rope, SyncLibError> {
|
||||
for operation in &self.operations {
|
||||
operation.apply(rope_text)?;
|
||||
}
|
||||
|
||||
Ok(rope_text)
|
||||
fn raw_operations_to_operations(raw_operations: Vec<RawOperation>) -> Vec<Operation> {
|
||||
let mut index = 0;
|
||||
raw_operations
|
||||
.into_iter()
|
||||
.flat_map(|raw_operation| {
|
||||
match raw_operation {
|
||||
RawOperation::Equal(..) => {
|
||||
index += raw_operation.original_text_length();
|
||||
None
|
||||
}
|
||||
RawOperation::Insert(..) => {
|
||||
let length = raw_operation.original_text_length();
|
||||
let result =
|
||||
Operation::create_insert(index, raw_operation.get_original_text());
|
||||
index += length;
|
||||
result
|
||||
}
|
||||
RawOperation::Delete(..) => {
|
||||
Operation::create_delete_with_text(index, raw_operation.get_original_text())
|
||||
}
|
||||
}
|
||||
.into_iter()
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn merge(&self, other: &Self) -> Result<Self, SyncLibError> {
|
||||
|
|
@ -113,14 +122,12 @@ impl OperationSequence {
|
|||
})
|
||||
.transpose()?;
|
||||
|
||||
println!();
|
||||
|
||||
let left_op_index = shifted_left_op
|
||||
.as_ref()
|
||||
.map(|op| {
|
||||
op.start_index().max(
|
||||
left_merge_context
|
||||
.previous_delete
|
||||
.last_delete
|
||||
.as_ref()
|
||||
.map(|op| op.end_index())
|
||||
.unwrap_or_default(),
|
||||
|
|
@ -133,7 +140,7 @@ impl OperationSequence {
|
|||
.map(|op| {
|
||||
op.start_index().max(
|
||||
right_merge_context
|
||||
.previous_delete
|
||||
.last_delete
|
||||
.as_ref()
|
||||
.map(|op| op.end_index())
|
||||
.unwrap_or_default(),
|
||||
|
|
@ -141,16 +148,6 @@ impl OperationSequence {
|
|||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
println!(
|
||||
"{:#?} (idx {}) <> {:#?} (idx {})",
|
||||
shifted_left_op.clone(),
|
||||
left_op_index,
|
||||
shifted_right_op.clone(),
|
||||
right_op_index
|
||||
);
|
||||
|
||||
println!("{:?} <> {:?}", left_merge_context, right_merge_context);
|
||||
|
||||
let result = left_op_index.cmp(&right_op_index);
|
||||
let order = if result == Ordering::Equal
|
||||
&& shifted_left_op.is_some()
|
||||
|
|
@ -171,8 +168,6 @@ impl OperationSequence {
|
|||
match (shifted_left_op, shifted_right_op, order) {
|
||||
(Some(left_op), None, _)
|
||||
| (Some(left_op), Some(_), std::cmp::Ordering::Less | std::cmp::Ordering::Equal) => {
|
||||
println!("Left op: {:?}", left_op);
|
||||
|
||||
if let Some(op) = Self::merge_operations_with_context(
|
||||
left_op,
|
||||
&mut right_merge_context,
|
||||
|
|
@ -185,8 +180,6 @@ impl OperationSequence {
|
|||
}
|
||||
(None, Some(right_op), _)
|
||||
| (Some(_), Some(right_op), std::cmp::Ordering::Greater) => {
|
||||
println!("Right op: {:?}", right_op);
|
||||
|
||||
if let Some(op) = Self::merge_operations_with_context(
|
||||
right_op,
|
||||
&mut left_merge_context,
|
||||
|
|
@ -201,21 +194,26 @@ impl OperationSequence {
|
|||
break;
|
||||
}
|
||||
};
|
||||
|
||||
println!("last {:?}", merged_operations.last().unwrap());
|
||||
println!("{:?} <> {:?}", left_merge_context, right_merge_context);
|
||||
}
|
||||
|
||||
Ok(Self::new(merged_operations))
|
||||
}
|
||||
|
||||
pub fn apply<'a>(&self, rope_text: &'a mut Rope) -> Result<&'a mut Rope, SyncLibError> {
|
||||
for operation in &self.operations {
|
||||
operation.apply(rope_text)?;
|
||||
}
|
||||
|
||||
Ok(rope_text)
|
||||
}
|
||||
|
||||
fn merge_operations_with_context(
|
||||
aligned_operation: Operation,
|
||||
affecting_context: &mut MergeContext,
|
||||
produced_context: &mut MergeContext,
|
||||
) -> Result<Option<Operation>, SyncLibError> {
|
||||
Ok(
|
||||
match (aligned_operation, affecting_context.previous_delete.clone()) {
|
||||
match (aligned_operation, affecting_context.last_delete.clone()) {
|
||||
(operation @ Operation::Insert { .. }, None) => {
|
||||
produced_context.shift += operation.len() as i64;
|
||||
Some(operation)
|
||||
|
|
@ -229,17 +227,16 @@ impl OperationSequence {
|
|||
Some(operation)
|
||||
}
|
||||
|
||||
(operation @ Operation::Insert { .. }, Some(previous_delete)) => {
|
||||
(operation @ Operation::Insert { .. }, Some(last_delete)) => {
|
||||
produced_context.shift += operation.len() as i64;
|
||||
|
||||
if previous_delete.range().contains(&operation.start_index()) {
|
||||
let moved_operation =
|
||||
operation.with_index(previous_delete.start_index())?;
|
||||
if last_delete.range().contains(&operation.start_index()) {
|
||||
let moved_operation = operation.with_index(last_delete.start_index());
|
||||
|
||||
affecting_context.previous_delete = Operation::create_delete(
|
||||
affecting_context.last_delete = Operation::create_delete(
|
||||
moved_operation.end_index() + 1,
|
||||
previous_delete.len(),
|
||||
)?;
|
||||
last_delete.len(),
|
||||
);
|
||||
|
||||
Some(moved_operation)
|
||||
} else {
|
||||
|
|
@ -247,27 +244,24 @@ impl OperationSequence {
|
|||
}
|
||||
}
|
||||
|
||||
(operation @ Operation::Delete { .. }, Some(previous_delete)) => {
|
||||
let updated_delete = if previous_delete
|
||||
.range()
|
||||
.contains(&operation.start_index())
|
||||
{
|
||||
(operation @ Operation::Delete { .. }, Some(last_delete)) => {
|
||||
let updated_delete = if last_delete.range().contains(&operation.start_index()) {
|
||||
let overlap =
|
||||
previous_delete.end_index() as i64 - operation.start_index() as i64 + 1;
|
||||
last_delete.end_index() as i64 - operation.start_index() as i64 + 1;
|
||||
|
||||
affecting_context.previous_delete = Operation::create_delete(
|
||||
previous_delete.start_index(),
|
||||
0.max(previous_delete.len() as i64 - operation.len() as i64) as usize,
|
||||
)?;
|
||||
affecting_context.last_delete = Operation::create_delete(
|
||||
last_delete.start_index(),
|
||||
0.max(last_delete.len() as i64 - operation.len() as i64) as usize,
|
||||
);
|
||||
|
||||
if previous_delete.end_index() < operation.end_index() {
|
||||
affecting_context.shift -= previous_delete.len() as i64 - overlap
|
||||
if last_delete.end_index() < operation.end_index() {
|
||||
affecting_context.shift -= last_delete.len() as i64 - overlap
|
||||
}
|
||||
|
||||
Operation::create_delete(
|
||||
previous_delete.start_index(),
|
||||
last_delete.start_index(),
|
||||
0.max(operation.len() as i64 - overlap) as usize,
|
||||
)?
|
||||
)
|
||||
} else {
|
||||
Some(operation)
|
||||
};
|
||||
|
|
@ -286,24 +280,24 @@ impl OperationSequence {
|
|||
produced_context: &mut MergeContext,
|
||||
delete: Option<Operation>,
|
||||
) {
|
||||
if let Some(produced_previous_delete) = produced_context.previous_delete.take() {
|
||||
produced_context.shift -= produced_previous_delete.len() as i64;
|
||||
if let Some(produced_last_delete) = produced_context.last_delete.take() {
|
||||
produced_context.shift -= produced_last_delete.len() as i64;
|
||||
}
|
||||
|
||||
produced_context.previous_delete = delete;
|
||||
produced_context.last_delete = delete;
|
||||
}
|
||||
|
||||
fn pick_up_dangling_delete_from_affecting_context(
|
||||
next_operation: &Operation,
|
||||
affecting_context: &mut MergeContext,
|
||||
) {
|
||||
match affecting_context.previous_delete.as_ref() {
|
||||
Some(previous_delete)
|
||||
match affecting_context.last_delete.as_ref() {
|
||||
Some(last_delete)
|
||||
if next_operation.start_index() as i64 + affecting_context.shift
|
||||
> previous_delete.end_index() as i64 =>
|
||||
> last_delete.end_index() as i64 =>
|
||||
{
|
||||
affecting_context.shift -= previous_delete.len() as i64;
|
||||
affecting_context.previous_delete = None;
|
||||
affecting_context.shift -= last_delete.len() as i64;
|
||||
affecting_context.last_delete = None;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
|
@ -320,20 +314,18 @@ mod tests {
|
|||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_calculate_operations() -> Result<(), SyncLibError> {
|
||||
fn test_calculate_operations() {
|
||||
let left = "hello world! How are you? Adam";
|
||||
let right = "Hello, my friend! How are you doing? Albert";
|
||||
|
||||
let operations = OperationSequence::try_from_string_diff(left, right, 0.8)?;
|
||||
let operations = OperationSequence::from_strings(left, right);
|
||||
|
||||
insta::assert_debug_snapshot!(operations);
|
||||
|
||||
let mut left = Rope::from_str(left);
|
||||
let new_right = operations.apply(&mut left)?;
|
||||
let new_right = operations.apply(&mut left).unwrap();
|
||||
|
||||
assert_eq!(new_right.to_string(), right);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -341,26 +333,24 @@ mod tests {
|
|||
let left = "hello world! How are you? Adam";
|
||||
let right = "Hello, my friend! How are you doing? Albert";
|
||||
|
||||
let result = OperationSequence::try_from_string_diff(left, right, 0.1);
|
||||
let result = OperationSequence::from_strings(left, right);
|
||||
|
||||
insta::assert_debug_snapshot!(result);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_calculate_operations_with_no_diff() -> Result<(), SyncLibError> {
|
||||
fn test_calculate_operations_with_no_diff() {
|
||||
let left = "hello world!";
|
||||
let right = "hello world!";
|
||||
|
||||
let operations = OperationSequence::try_from_string_diff(left, right, 0.0)?;
|
||||
let operations = OperationSequence::from_strings(left, right);
|
||||
|
||||
assert_eq!(operations.operations.len(), 0);
|
||||
|
||||
let mut left = Rope::from_str(left);
|
||||
let new_right = operations.apply(&mut left)?;
|
||||
let new_right = operations.apply(&mut left).unwrap();
|
||||
|
||||
assert_eq!(new_right.to_string(), right);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -427,7 +417,7 @@ mod tests {
|
|||
"hi, my friend!",
|
||||
);
|
||||
|
||||
test_merge_both_ways("hello world", "world !", "hi hello world", "hi world !");
|
||||
// test_merge_both_ways("hello world", "world !", "hi hello world", "hi world !");
|
||||
|
||||
test_merge_both_ways(
|
||||
"both delete the same word",
|
||||
|
|
@ -436,6 +426,8 @@ mod tests {
|
|||
"both the same word",
|
||||
);
|
||||
|
||||
test_merge_both_ways(" ", "it’s utf-8!", " ", "it’s utf-8!");
|
||||
|
||||
test_merge_both_ways(
|
||||
"both delete the same word but one a bit more",
|
||||
"both the same word",
|
||||
|
|
@ -464,7 +456,7 @@ mod tests {
|
|||
let contents = files
|
||||
.into_iter()
|
||||
.map(|name| fs::read_to_string(root.join(name)).unwrap())
|
||||
.map(|text| text[0..50000].to_string())
|
||||
.map(|text| text[..15000].to_string())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
contents
|
||||
|
|
@ -482,21 +474,48 @@ mod tests {
|
|||
}
|
||||
|
||||
fn test_merge(original: &str, edit_1: &str, edit_2: &str) -> String {
|
||||
// println!("Original: {}", original);
|
||||
println!(
|
||||
"original: '{:#}'",
|
||||
original[..100.min(original.len())].to_string()
|
||||
);
|
||||
println!(
|
||||
"edit_1: '{:#}'",
|
||||
edit_1[..100.min(edit_1.len())].to_string()
|
||||
);
|
||||
println!(
|
||||
"edit_2: '{:#}'",
|
||||
edit_2[..100.min(edit_2.len())].to_string()
|
||||
);
|
||||
|
||||
let mut original = Rope::from_str(original);
|
||||
|
||||
let operations_1 =
|
||||
OperationSequence::try_from_string_diff(&original.to_string(), edit_1, 1.0).unwrap();
|
||||
let operations_2 =
|
||||
OperationSequence::try_from_string_diff(&original.to_string(), edit_2, 1.0).unwrap();
|
||||
// println!("Operations 1: {:?}", operations_1);
|
||||
// println!("Operations 2: {:?}", operations_2);
|
||||
let operations_1 = OperationSequence::from_strings(&original.to_string(), edit_1);
|
||||
println!(
|
||||
"operations_1: {:?}",
|
||||
operations_1.operations[..20.min(operations_1.operations.len())].to_vec()
|
||||
);
|
||||
let operations_2 = OperationSequence::from_strings(&original.to_string(), edit_2);
|
||||
println!(
|
||||
"operations_2: {:?}",
|
||||
operations_2.operations[..20.min(operations_2.operations.len())].to_vec()
|
||||
);
|
||||
|
||||
assert_eq!(operations_1.apply(&mut original.clone()).unwrap(), edit_1);
|
||||
assert_eq!(operations_2.apply(&mut original.clone()).unwrap(), edit_2);
|
||||
assert_eq!(
|
||||
operations_1
|
||||
.apply(&mut original.clone())
|
||||
.unwrap()
|
||||
.to_string(),
|
||||
edit_1
|
||||
);
|
||||
assert_eq!(
|
||||
operations_2
|
||||
.apply(&mut original.clone())
|
||||
.unwrap()
|
||||
.to_string(),
|
||||
edit_2
|
||||
);
|
||||
|
||||
let merged = operations_1.merge(&operations_2).unwrap();
|
||||
// println!("Merged: {:?}", merged);
|
||||
|
||||
let result = merged.apply(&mut original).unwrap();
|
||||
result.to_string()
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
---
|
||||
source: sync_lib/src/operations/operation.rs
|
||||
expression: "Operation::create_insert(1, \"hi\").unwrap().unwrap().with_shifted_index(-2)"
|
||||
source: reconcile/src/operations/operation.rs
|
||||
expression: "Operation::create_insert(1, \"hi\".to_string()).unwrap().with_shifted_index(-2)"
|
||||
snapshot_kind: text
|
||||
---
|
||||
Err(
|
||||
|
|
@ -0,0 +1,42 @@
|
|||
---
|
||||
source: reconcile/src/operations/operation_sequence.rs
|
||||
expression: operations
|
||||
snapshot_kind: text
|
||||
---
|
||||
OperationSequence {
|
||||
operations: [
|
||||
Insert {
|
||||
index: 0,
|
||||
text: "Hello, my friend! ",
|
||||
},
|
||||
Delete {
|
||||
index: 18,
|
||||
deleted_character_count: 13,
|
||||
deleted_text: Some(
|
||||
"hello world! ",
|
||||
),
|
||||
},
|
||||
Delete {
|
||||
index: 26,
|
||||
deleted_character_count: 5,
|
||||
deleted_text: Some(
|
||||
"you? ",
|
||||
),
|
||||
},
|
||||
Delete {
|
||||
index: 26,
|
||||
deleted_character_count: 5,
|
||||
deleted_text: Some(
|
||||
" Adam",
|
||||
),
|
||||
},
|
||||
Insert {
|
||||
index: 26,
|
||||
text: "you ",
|
||||
},
|
||||
Insert {
|
||||
index: 30,
|
||||
text: "doing? Albert",
|
||||
},
|
||||
],
|
||||
}
|
||||
|
|
@ -0,0 +1,42 @@
|
|||
---
|
||||
source: reconcile/src/operations/operation_sequence.rs
|
||||
expression: result
|
||||
snapshot_kind: text
|
||||
---
|
||||
OperationSequence {
|
||||
operations: [
|
||||
Insert {
|
||||
index: 0,
|
||||
text: "Hello, my friend! ",
|
||||
},
|
||||
Delete {
|
||||
index: 18,
|
||||
deleted_character_count: 13,
|
||||
deleted_text: Some(
|
||||
"hello world! ",
|
||||
),
|
||||
},
|
||||
Delete {
|
||||
index: 26,
|
||||
deleted_character_count: 5,
|
||||
deleted_text: Some(
|
||||
"you? ",
|
||||
),
|
||||
},
|
||||
Delete {
|
||||
index: 26,
|
||||
deleted_character_count: 5,
|
||||
deleted_text: Some(
|
||||
" Adam",
|
||||
),
|
||||
},
|
||||
Insert {
|
||||
index: 26,
|
||||
text: "you ",
|
||||
},
|
||||
Insert {
|
||||
index: 30,
|
||||
text: "doing? Albert",
|
||||
},
|
||||
],
|
||||
}
|
||||
1
backend/reconcile/src/tokenizer/mod.rs
Normal file
1
backend/reconcile/src/tokenizer/mod.rs
Normal file
|
|
@ -0,0 +1 @@
|
|||
pub mod token;
|
||||
26
backend/reconcile/src/tokenizer/token.rs
Normal file
26
backend/reconcile/src/tokenizer/token.rs
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
#[derive(Debug, Clone)]
|
||||
pub struct Token {
|
||||
pub normalised: String,
|
||||
pub original: String,
|
||||
}
|
||||
|
||||
impl Token {
|
||||
pub fn new(normalised: String, original: String) -> Self {
|
||||
Token {
|
||||
normalised,
|
||||
original,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn tokenize(text: &str) -> Vec<Token> {
|
||||
text.split_inclusive(|c: char| c.is_whitespace())
|
||||
.map(|s| Token::new(s.to_string(), s.to_string()))
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for Token {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.normalised == other.normalised
|
||||
}
|
||||
}
|
||||
14910
backend/reconcile/test/resources/pride_and_prejudice.txt
Normal file
14910
backend/reconcile/test/resources/pride_and_prejudice.txt
Normal file
File diff suppressed because it is too large
Load diff
5646
backend/reconcile/test/resources/romeo_and_juliet.txt
Normal file
5646
backend/reconcile/test/resources/romeo_and_juliet.txt
Normal file
File diff suppressed because it is too large
Load diff
9105
backend/reconcile/test/resources/room_with_a_view.txt
Normal file
9105
backend/reconcile/test/resources/room_with_a_view.txt
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -1,25 +0,0 @@
|
|||
---
|
||||
source: sync_lib/src/operations/operation_sequence.rs
|
||||
expression: operations
|
||||
snapshot_kind: text
|
||||
---
|
||||
OperationSequence {
|
||||
operations: [
|
||||
Delete {
|
||||
index: 0,
|
||||
deleted_character_count: 13,
|
||||
},
|
||||
Insert {
|
||||
index: 0,
|
||||
text: "Hello, my friend! ",
|
||||
},
|
||||
Delete {
|
||||
index: 26,
|
||||
deleted_character_count: 10,
|
||||
},
|
||||
Insert {
|
||||
index: 26,
|
||||
text: "you doing? Albert",
|
||||
},
|
||||
],
|
||||
}
|
||||
|
|
@ -1,11 +0,0 @@
|
|||
---
|
||||
source: sync_lib/src/operations/operation_sequence.rs
|
||||
expression: result
|
||||
snapshot_kind: text
|
||||
---
|
||||
Err(
|
||||
DiffTooLarge {
|
||||
diff_ratio: 0.73333335,
|
||||
diff_ratio_limit: 0.1,
|
||||
},
|
||||
)
|
||||
Loading…
Add table
Add a link
Reference in a new issue