Delete reconcile
This commit is contained in:
parent
75b020146a
commit
d9d14e03e9
49 changed files with 0 additions and 34423 deletions
|
|
@ -1,23 +0,0 @@
|
|||
[package]
|
||||
name = "reconcile"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
authors.workspace = true
|
||||
license.workspace = true
|
||||
repository.workspace = true
|
||||
|
||||
[dependencies]
|
||||
serde = { version = "1.0.219", optional = true, features = ["derive"] }
|
||||
|
||||
[features]
|
||||
serde = [ "dep:serde" ]
|
||||
|
||||
[dev-dependencies]
|
||||
insta = "1.42.2"
|
||||
pretty_assertions = "1.4.1"
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_yaml ="0.9.34"
|
||||
test-case = "3.3.1"
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
|
@ -1,2 +0,0 @@
|
|||
pub mod myers;
|
||||
pub mod raw_operation;
|
||||
|
|
@ -1,357 +0,0 @@
|
|||
//! Taken from <https://github.com/mitsuhiko/similar/blob/7e15c44de11a1cd61e1149189929e189ef977fd8/src/algorithms/myers.rs>
|
||||
//!
|
||||
//! Myers' diff algorithm.
|
||||
//!
|
||||
//! * time: `O((N+M)D)`
|
||||
//! * space `O(N+M)`
|
||||
//!
|
||||
//! See [the original article by Eugene W. Myers](http://www.xmailserver.org/diff2.pdf)
|
||||
//! describing it.
|
||||
//!
|
||||
//! The implementation of this algorithm is based on the implementation by
|
||||
//! Brandon Williams.
|
||||
//!
|
||||
//! # Heuristics
|
||||
//!
|
||||
//! At present this implementation of Myers' does not implement any more
|
||||
//! advanced heuristics that would solve some pathological cases. For instance
|
||||
//! passing two large and completely distinct sequences to the algorithm will
|
||||
//! make it spin without making reasonable progress.
|
||||
//! For potential improvements here see [similar#15](https://github.com/mitsuhiko/similar/issues/15).
|
||||
|
||||
use std::{
|
||||
ops::{Index, IndexMut, Range},
|
||||
vec,
|
||||
};
|
||||
|
||||
use super::raw_operation::RawOperation;
|
||||
use crate::{
|
||||
tokenizer::token::Token,
|
||||
utils::{common_prefix_len::common_prefix_len, common_suffix_len::common_suffix_len},
|
||||
};
|
||||
|
||||
/// Myers' diff algorithm with deadline.
|
||||
///
|
||||
/// Diff `old`, between indices `old_range` and `new` between indices
|
||||
/// `new_range`.
|
||||
///
|
||||
/// The returned `RawOperations` all have a token count of 1.
|
||||
pub fn diff<T>(old: &[Token<T>], new: &[Token<T>]) -> Vec<RawOperation<T>>
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
let max_d = (old.len() + new.len()).div_ceil(2) + 1;
|
||||
let mut vb = V::new(max_d);
|
||||
let mut vf = V::new(max_d);
|
||||
let mut result: Vec<RawOperation<T>> = vec![];
|
||||
|
||||
conquer(
|
||||
old,
|
||||
0..old.len(),
|
||||
new,
|
||||
0..new.len(),
|
||||
&mut vf,
|
||||
&mut vb,
|
||||
&mut result,
|
||||
);
|
||||
|
||||
debug_assert!(
|
||||
result.iter().all(|op| op.tokens().len() == 1),
|
||||
"All operations should be of length 1"
|
||||
);
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
// A D-path is a path which starts at (0,0) that has exactly D non-diagonal
|
||||
// edges. All D-paths consist of a (D - 1)-path followed by a non-diagonal edge
|
||||
// and then a possibly empty sequence of diagonal edges called a snake.
|
||||
|
||||
/// `V` contains the endpoints of the furthest reaching `D-paths`. For each
|
||||
/// recorded endpoint `(x,y)` in diagonal `k`, we only need to retain `x`
|
||||
/// because `y` can be computed from `x - k`. In other words, `V` is an array of
|
||||
/// integers where `V[k]` contains the row index of the endpoint of the furthest
|
||||
/// reaching path in diagonal `k`.
|
||||
///
|
||||
/// We can't use a traditional Vec to represent `V` since we use `k` as an index
|
||||
/// and it can take on negative values. So instead `V` is represented as a
|
||||
/// light-weight wrapper around a Vec plus an `offset` which is the maximum
|
||||
/// value `k` can take on in order to map negative `k`'s back to a value >= 0.
|
||||
#[derive(Debug)]
|
||||
struct V {
|
||||
offset: isize,
|
||||
v: Vec<usize>, // Look into initializing this to -1 and storing isize
|
||||
}
|
||||
|
||||
impl V {
|
||||
fn new(max_d: usize) -> Self {
|
||||
Self {
|
||||
offset: max_d as isize,
|
||||
v: vec![0; 2 * max_d],
|
||||
}
|
||||
}
|
||||
|
||||
fn len(&self) -> usize { self.v.len() }
|
||||
}
|
||||
|
||||
impl Index<isize> for V {
|
||||
type Output = usize;
|
||||
|
||||
fn index(&self, index: isize) -> &Self::Output { &self.v[(index + self.offset) as usize] }
|
||||
}
|
||||
|
||||
impl IndexMut<isize> for V {
|
||||
fn index_mut(&mut self, index: isize) -> &mut Self::Output {
|
||||
&mut self.v[(index + self.offset) as usize]
|
||||
}
|
||||
}
|
||||
|
||||
fn split_at(range: Range<usize>, at: usize) -> (Range<usize>, Range<usize>) {
|
||||
(range.start..at, at..range.end)
|
||||
}
|
||||
|
||||
/// A `Snake` is a sequence of diagonal edges in the edit graph. Normally
|
||||
/// a snake has a start end end point (and it is possible for a snake to have
|
||||
/// a length of zero, meaning the start and end points are the same) however
|
||||
/// we do not need the end point which is why it's not implemented here.
|
||||
///
|
||||
/// The divide part of a divide-and-conquer strategy. A D-path has D+1 snakes
|
||||
/// some of which may be empty. The divide step requires finding the ceil(D/2) +
|
||||
/// 1 or middle snake of an optimal D-path. The idea for doing so is to
|
||||
/// simultaneously run the basic algorithm in both the forward and reverse
|
||||
/// directions until furthest reaching forward and reverse paths starting at
|
||||
/// opposing corners 'overlap'.
|
||||
fn find_middle_snake<T>(
|
||||
old: &[Token<T>],
|
||||
old_range: Range<usize>,
|
||||
new: &[Token<T>],
|
||||
new_range: Range<usize>,
|
||||
vf: &mut V,
|
||||
vb: &mut V,
|
||||
) -> Option<(usize, usize)>
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
let n = old_range.len();
|
||||
let m = new_range.len();
|
||||
|
||||
// By Lemma 1 in the paper, the optimal edit script length is odd or even as
|
||||
// `delta` is odd or even.
|
||||
let delta = n as isize - m as isize;
|
||||
let odd = delta & 1 == 1;
|
||||
|
||||
// The initial point at (0, -1)
|
||||
vf[1] = 0;
|
||||
// The initial point at (N, M+1)
|
||||
vb[1] = 0;
|
||||
|
||||
let d_max = (n + m).div_ceil(2) + 1;
|
||||
assert!(vf.len() >= d_max);
|
||||
assert!(vb.len() >= d_max);
|
||||
|
||||
for d in 0..d_max as isize {
|
||||
// Forward path
|
||||
for k in (-d..=d).rev().step_by(2) {
|
||||
let mut x = if k == -d || (k != d && vf[k - 1] < vf[k + 1]) {
|
||||
vf[k + 1]
|
||||
} else {
|
||||
vf[k - 1] + 1
|
||||
};
|
||||
let y = (x as isize - k) as usize;
|
||||
|
||||
// The coordinate of the start of a snake
|
||||
let (x0, y0) = (x, y);
|
||||
// While these sequences are identical, keep moving through the
|
||||
// graph with no cost
|
||||
if x < old_range.len() && y < new_range.len() {
|
||||
let advance = common_prefix_len(
|
||||
old,
|
||||
old_range.start + x..old_range.end,
|
||||
new,
|
||||
new_range.start + y..new_range.end,
|
||||
);
|
||||
x += advance;
|
||||
}
|
||||
|
||||
// This is the new best x value
|
||||
vf[k] = x;
|
||||
|
||||
// Only check for connections from the forward search when N - M is
|
||||
// odd and when there is a reciprocal k line coming from the other
|
||||
// direction.
|
||||
if odd && (k - delta).abs() <= (d - 1) {
|
||||
// TODO optimize this so we don't have to compare against n
|
||||
if vf[k] + vb[-(k - delta)] >= n {
|
||||
// Return the snake
|
||||
return Some((x0 + old_range.start, y0 + new_range.start));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Backward path
|
||||
for k in (-d..=d).rev().step_by(2) {
|
||||
let mut x = if k == -d || (k != d && vb[k - 1] < vb[k + 1]) {
|
||||
vb[k + 1]
|
||||
} else {
|
||||
vb[k - 1] + 1
|
||||
};
|
||||
let mut y = (x as isize - k) as usize;
|
||||
|
||||
// The coordinate of the start of a snake
|
||||
if x < n && y < m {
|
||||
let advance = common_suffix_len(
|
||||
old,
|
||||
old_range.start..old_range.start + n - x,
|
||||
new,
|
||||
new_range.start..new_range.start + m - y,
|
||||
);
|
||||
x += advance;
|
||||
y += advance;
|
||||
}
|
||||
|
||||
// This is the new best x value
|
||||
vb[k] = x;
|
||||
|
||||
if !odd && (k - delta).abs() <= d {
|
||||
// TODO optimize this so we don't have to compare against n
|
||||
if vb[k] + vf[-(k - delta)] >= n {
|
||||
// Return the snake
|
||||
return Some((n - x + old_range.start, m - y + new_range.start));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Maybe there's an opportunity to optimize and bail early?
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
fn conquer<T>(
|
||||
old: &[Token<T>],
|
||||
mut old_range: Range<usize>,
|
||||
new: &[Token<T>],
|
||||
mut new_range: Range<usize>,
|
||||
vf: &mut V,
|
||||
vb: &mut V,
|
||||
result: &mut Vec<RawOperation<T>>,
|
||||
) where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
// Check for common prefix
|
||||
let common_prefix_len = common_prefix_len(old, old_range.clone(), new, new_range.clone());
|
||||
if common_prefix_len > 0 {
|
||||
result.extend(
|
||||
old[old_range.start..old_range.start + common_prefix_len]
|
||||
.iter()
|
||||
.map(|token| RawOperation::Equal(vec![token.clone()])),
|
||||
);
|
||||
}
|
||||
old_range.start += common_prefix_len;
|
||||
new_range.start += common_prefix_len;
|
||||
|
||||
// Check for common suffix
|
||||
let common_suffix_len = common_suffix_len(old, old_range.clone(), new, new_range.clone());
|
||||
let common_suffix = (
|
||||
old_range.end - common_suffix_len,
|
||||
new_range.end - common_suffix_len,
|
||||
);
|
||||
old_range.end -= common_suffix_len;
|
||||
new_range.end -= common_suffix_len;
|
||||
|
||||
if old_range.is_empty() && new_range.is_empty() {
|
||||
// do nothing
|
||||
} else if new_range.is_empty() {
|
||||
result.extend(
|
||||
old[old_range.start..old_range.start + old_range.len()]
|
||||
.iter()
|
||||
.map(|token| RawOperation::Delete(vec![token.clone()])),
|
||||
);
|
||||
} else if old_range.is_empty() {
|
||||
result.extend(
|
||||
new[new_range.start..new_range.start + new_range.len()]
|
||||
.iter()
|
||||
.map(|token| RawOperation::Insert(vec![token.clone()])),
|
||||
);
|
||||
} else if let Some((x_start, y_start)) =
|
||||
find_middle_snake(old, old_range.clone(), new, new_range.clone(), vf, vb)
|
||||
{
|
||||
let (old_a, old_b) = split_at(old_range, x_start);
|
||||
let (new_a, new_b) = split_at(new_range, y_start);
|
||||
conquer(old, old_a, new, new_a, vf, vb, result);
|
||||
conquer(old, old_b, new, new_b, vf, vb, result);
|
||||
} else {
|
||||
result.extend(
|
||||
old[old_range.start..old_range.end]
|
||||
.iter()
|
||||
.map(|token| RawOperation::Delete(vec![token.clone()])),
|
||||
);
|
||||
result.extend(
|
||||
new[new_range.start..new_range.end]
|
||||
.iter()
|
||||
.map(|token| RawOperation::Insert(vec![token.clone()])),
|
||||
);
|
||||
}
|
||||
|
||||
if common_suffix_len > 0 {
|
||||
result.extend(
|
||||
old[common_suffix.0..common_suffix.0 + common_suffix_len]
|
||||
.iter()
|
||||
.map(|token| RawOperation::Equal(vec![token.clone()])),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use insta::assert_debug_snapshot;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_empty_diff() {
|
||||
let old: Vec<Token<String>> = vec![];
|
||||
let new: Vec<Token<String>> = vec![];
|
||||
let result = diff(&old, &new);
|
||||
assert_eq!(result.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_identical_content() {
|
||||
let content = vec!["a".into(), "b".into(), "c".into()];
|
||||
let result = diff(&content, &content);
|
||||
assert_debug_snapshot!(result);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_insert_only() {
|
||||
let old: Vec<Token<String>> = vec![];
|
||||
let new: Vec<Token<String>> = vec!["a".into(), "b".into()];
|
||||
let result = diff(&old, &new);
|
||||
assert_debug_snapshot!(result);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_delete_only() {
|
||||
let old = vec!["a".into(), "b".into()];
|
||||
let new: Vec<Token<String>> = vec![];
|
||||
let result = diff(&old, &new);
|
||||
assert_debug_snapshot!(result);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_prefix_and_suffix() {
|
||||
let old = vec!["a".into(), "b".into(), "c".into(), "d".into()];
|
||||
let new = vec!["a".into(), "x".into(), "d".into()];
|
||||
let result = diff(&old, &new);
|
||||
assert_debug_snapshot!(result);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_complex_diff() {
|
||||
let old = vec!["a".into(), "b".into(), "c".into(), "d".into()];
|
||||
let new = vec!["a".into(), "x".into(), "c".into(), "y".into()];
|
||||
let result = diff(&old, &new);
|
||||
assert_debug_snapshot!(result);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,64 +0,0 @@
|
|||
use crate::tokenizer::token::Token;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum RawOperation<T>
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
Insert(Vec<Token<T>>),
|
||||
Delete(Vec<Token<T>>),
|
||||
Equal(Vec<Token<T>>),
|
||||
}
|
||||
|
||||
impl<T> RawOperation<T>
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
pub fn tokens(&self) -> &Vec<Token<T>> {
|
||||
match self {
|
||||
RawOperation::Insert(tokens)
|
||||
| RawOperation::Delete(tokens)
|
||||
| RawOperation::Equal(tokens) => tokens,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn original_text_length(&self) -> usize {
|
||||
self.tokens().iter().map(Token::get_original_length).sum()
|
||||
}
|
||||
|
||||
pub fn get_original_text(self) -> String { self.tokens().iter().map(Token::original).collect() }
|
||||
|
||||
pub fn is_left_joinable(&self) -> bool {
|
||||
let first_token = self.tokens().first();
|
||||
first_token.is_none_or(super::super::tokenizer::token::Token::get_is_left_joinable)
|
||||
}
|
||||
|
||||
pub fn is_right_joinable(&self) -> bool {
|
||||
let last_token = self.tokens().last();
|
||||
last_token.is_none_or(super::super::tokenizer::token::Token::get_is_right_joinable)
|
||||
}
|
||||
|
||||
/// Extends the operation with another operation. Only operations of the
|
||||
/// same type as self can be used to extend self, otherwise the function
|
||||
/// will panic.
|
||||
pub fn extend(self, other: RawOperation<T>) -> RawOperation<T> {
|
||||
debug_assert!(
|
||||
std::mem::discriminant(&self) == std::mem::discriminant(&other),
|
||||
"Cannot extend operations of different types. This should have been handled before \
|
||||
calling this function."
|
||||
);
|
||||
|
||||
match (self, other) {
|
||||
(RawOperation::Insert(tokens1), RawOperation::Insert(tokens2)) => {
|
||||
RawOperation::Insert(tokens1.into_iter().chain(tokens2).collect())
|
||||
}
|
||||
(RawOperation::Delete(tokens1), RawOperation::Delete(tokens2)) => {
|
||||
RawOperation::Delete(tokens1.into_iter().chain(tokens2).collect())
|
||||
}
|
||||
(RawOperation::Equal(tokens1), RawOperation::Equal(tokens2)) => {
|
||||
RawOperation::Equal(tokens1.into_iter().chain(tokens2).collect())
|
||||
}
|
||||
_ => unreachable!("Only operations of the same type can be extended"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,67 +0,0 @@
|
|||
---
|
||||
source: reconcile/src/diffs/myers.rs
|
||||
expression: result
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
Equal(
|
||||
[
|
||||
Token {
|
||||
normalised: "a",
|
||||
original: "a",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
Insert(
|
||||
[
|
||||
Token {
|
||||
normalised: "x",
|
||||
original: "x",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
Delete(
|
||||
[
|
||||
Token {
|
||||
normalised: "b",
|
||||
original: "b",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
Equal(
|
||||
[
|
||||
Token {
|
||||
normalised: "c",
|
||||
original: "c",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
Insert(
|
||||
[
|
||||
Token {
|
||||
normalised: "y",
|
||||
original: "y",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
Delete(
|
||||
[
|
||||
Token {
|
||||
normalised: "d",
|
||||
original: "d",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
]
|
||||
|
|
@ -1,27 +0,0 @@
|
|||
---
|
||||
source: reconcile/src/diffs/myers.rs
|
||||
expression: result
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
Delete(
|
||||
[
|
||||
Token {
|
||||
normalised: "a",
|
||||
original: "a",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
Delete(
|
||||
[
|
||||
Token {
|
||||
normalised: "b",
|
||||
original: "b",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
]
|
||||
|
|
@ -1,37 +0,0 @@
|
|||
---
|
||||
source: reconcile/src/diffs/myers.rs
|
||||
expression: result
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
Equal(
|
||||
[
|
||||
Token {
|
||||
normalised: "a",
|
||||
original: "a",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
Equal(
|
||||
[
|
||||
Token {
|
||||
normalised: "b",
|
||||
original: "b",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
Equal(
|
||||
[
|
||||
Token {
|
||||
normalised: "c",
|
||||
original: "c",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
]
|
||||
|
|
@ -1,27 +0,0 @@
|
|||
---
|
||||
source: reconcile/src/diffs/myers.rs
|
||||
expression: result
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
Insert(
|
||||
[
|
||||
Token {
|
||||
normalised: "a",
|
||||
original: "a",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
Insert(
|
||||
[
|
||||
Token {
|
||||
normalised: "b",
|
||||
original: "b",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
]
|
||||
|
|
@ -1,57 +0,0 @@
|
|||
---
|
||||
source: reconcile/src/diffs/myers.rs
|
||||
expression: result
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
Equal(
|
||||
[
|
||||
Token {
|
||||
normalised: "a",
|
||||
original: "a",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
Delete(
|
||||
[
|
||||
Token {
|
||||
normalised: "b",
|
||||
original: "b",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
Delete(
|
||||
[
|
||||
Token {
|
||||
normalised: "c",
|
||||
original: "c",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
Insert(
|
||||
[
|
||||
Token {
|
||||
normalised: "x",
|
||||
original: "x",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
Equal(
|
||||
[
|
||||
Token {
|
||||
normalised: "d",
|
||||
original: "d",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
],
|
||||
),
|
||||
]
|
||||
|
|
@ -1,10 +0,0 @@
|
|||
mod diffs;
|
||||
mod operation_transformation;
|
||||
mod tokenizer;
|
||||
mod utils;
|
||||
|
||||
pub use operation_transformation::{
|
||||
CursorPosition, EditedText, TextWithCursors, reconcile, reconcile_with_cursors,
|
||||
reconcile_with_tokenizer,
|
||||
};
|
||||
pub use tokenizer::{Tokenizer, token::Token};
|
||||
|
|
@ -1,166 +0,0 @@
|
|||
mod cursor;
|
||||
mod edited_text;
|
||||
mod merge_context;
|
||||
mod operation;
|
||||
mod ordered_operation;
|
||||
|
||||
pub use cursor::{CursorPosition, TextWithCursors};
|
||||
pub use edited_text::EditedText;
|
||||
pub use operation::Operation;
|
||||
|
||||
use crate::Tokenizer;
|
||||
|
||||
#[must_use]
|
||||
pub fn reconcile(original: &str, left: &str, right: &str) -> String {
|
||||
reconcile_with_cursors(original, left.into(), right.into())
|
||||
.text
|
||||
.to_string()
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn reconcile_with_cursors<'a>(
|
||||
original: &'a str,
|
||||
left: TextWithCursors<'a>,
|
||||
right: TextWithCursors<'a>,
|
||||
) -> TextWithCursors<'static> {
|
||||
let left_operations = EditedText::from_strings(original, left);
|
||||
let right_operations = EditedText::from_strings(original, right);
|
||||
|
||||
let merged_operations = left_operations.merge(right_operations);
|
||||
|
||||
TextWithCursors::new_owned(merged_operations.apply(), merged_operations.cursors)
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn reconcile_with_tokenizer<'a, F, T>(
|
||||
original: &str,
|
||||
left: TextWithCursors<'a>,
|
||||
right: TextWithCursors<'a>,
|
||||
tokenizer: &Tokenizer<T>,
|
||||
) -> TextWithCursors<'static>
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
let left_operations = EditedText::from_strings_with_tokenizer(original, left, tokenizer);
|
||||
let right_operations = EditedText::from_strings_with_tokenizer(original, right, tokenizer);
|
||||
|
||||
let merged_operations = left_operations.merge(right_operations);
|
||||
|
||||
TextWithCursors::new_owned(merged_operations.apply(), merged_operations.cursors)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::{fs, ops::Range, path::Path};
|
||||
|
||||
use pretty_assertions::assert_eq;
|
||||
use test_case::test_matrix;
|
||||
|
||||
use super::*;
|
||||
use crate::CursorPosition;
|
||||
|
||||
#[test]
|
||||
fn test_cursor_complex() {
|
||||
let original = "this is some complex text to test cursor positions";
|
||||
let left = TextWithCursors::new(
|
||||
"this is really complex text for testing cursor positions",
|
||||
vec![
|
||||
CursorPosition {
|
||||
id: 0,
|
||||
char_index: 8,
|
||||
}, // after "this is "
|
||||
CursorPosition {
|
||||
id: 1,
|
||||
char_index: 22,
|
||||
}, // after "this is really complex text"
|
||||
],
|
||||
);
|
||||
let right = TextWithCursors::new(
|
||||
"that was some complex sample to test cursor movements",
|
||||
vec![
|
||||
CursorPosition {
|
||||
id: 2,
|
||||
char_index: 5,
|
||||
}, // after "that "
|
||||
CursorPosition {
|
||||
id: 3,
|
||||
char_index: 29,
|
||||
}, // after "some complex sample "
|
||||
],
|
||||
);
|
||||
|
||||
let merged = reconcile_with_cursors(original, left, right);
|
||||
|
||||
assert_eq!(
|
||||
merged,
|
||||
TextWithCursors::new(
|
||||
"that was really complex sample for testing cursor movements",
|
||||
vec![
|
||||
CursorPosition {
|
||||
id: 2,
|
||||
char_index: 5
|
||||
}, // unchanged
|
||||
CursorPosition {
|
||||
id: 0,
|
||||
char_index: 9
|
||||
}, // before "really"
|
||||
CursorPosition {
|
||||
id: 1,
|
||||
char_index: 23
|
||||
}, // inside of "s|ample" because "text" got replaced by "sample"
|
||||
CursorPosition {
|
||||
id: 3,
|
||||
char_index: 43
|
||||
}, // before "cursor movements"
|
||||
]
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
#[ignore = "expensive to run, only run in CI"]
|
||||
#[test_matrix( [
|
||||
"pride_and_prejudice.txt",
|
||||
"room_with_a_view.txt",
|
||||
"kun_lu.txt",
|
||||
"blns.txt"
|
||||
], [
|
||||
"pride_and_prejudice.txt",
|
||||
"room_with_a_view.txt",
|
||||
"kun_lu.txt",
|
||||
"blns.txt"
|
||||
], [
|
||||
"pride_and_prejudice.txt",
|
||||
"room_with_a_view.txt",
|
||||
"kun_lu.txt",
|
||||
"blns.txt"
|
||||
], [0..10000, 10000..20000], [0..10000, 10000..20000], [0..10000, 10000..20000])]
|
||||
fn test_merge_files_without_panic(
|
||||
file_name_1: &str,
|
||||
file_name_2: &str,
|
||||
file_name_3: &str,
|
||||
range_1: Range<usize>,
|
||||
range_2: Range<usize>,
|
||||
range_3: Range<usize>,
|
||||
) {
|
||||
let files = [file_name_1, file_name_2, file_name_3];
|
||||
let permutations = [range_1, range_2, range_3];
|
||||
|
||||
let root = Path::new("tests/resources/");
|
||||
|
||||
let contents = files
|
||||
.iter()
|
||||
.zip(permutations.iter())
|
||||
.map(|(file, range)| {
|
||||
let path = root.join(file);
|
||||
fs::read_to_string(&path)
|
||||
.unwrap()
|
||||
.chars()
|
||||
.skip(range.start)
|
||||
.take(range.end)
|
||||
.collect::<String>()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let _ = reconcile(&contents[0], &contents[1], &contents[2]);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,57 +0,0 @@
|
|||
use std::borrow::Cow;
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
// CursorPosition represents the position of an identifiable cursor in a text
|
||||
// document based on its (UTF-8) character index.
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone, PartialEq, Default)]
|
||||
pub struct CursorPosition {
|
||||
pub id: usize,
|
||||
pub char_index: usize,
|
||||
}
|
||||
|
||||
impl CursorPosition {
|
||||
#[must_use]
|
||||
pub fn with_index(&self, index: usize) -> Self {
|
||||
CursorPosition {
|
||||
id: self.id,
|
||||
char_index: index,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone, PartialEq, Default)]
|
||||
pub struct TextWithCursors<'a> {
|
||||
pub text: Cow<'a, str>,
|
||||
pub cursors: Vec<CursorPosition>,
|
||||
}
|
||||
|
||||
impl<'a> TextWithCursors<'a> {
|
||||
#[must_use]
|
||||
pub fn new(text: &'a str, cursors: Vec<CursorPosition>) -> Self {
|
||||
Self {
|
||||
text: text.into(),
|
||||
cursors,
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn new_owned(text: String, cursors: Vec<CursorPosition>) -> Self {
|
||||
Self {
|
||||
text: text.into(),
|
||||
cursors,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<&'a str> for TextWithCursors<'a> {
|
||||
fn from(text: &'a str) -> Self {
|
||||
Self {
|
||||
text: text.into(),
|
||||
cursors: Vec::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,381 +0,0 @@
|
|||
use core::iter;
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::{CursorPosition, Operation, TextWithCursors, ordered_operation::OrderedOperation};
|
||||
use crate::{
|
||||
diffs::{myers::diff, raw_operation::RawOperation},
|
||||
operation_transformation::merge_context::MergeContext,
|
||||
tokenizer::{Tokenizer, word_tokenizer::word_tokenizer},
|
||||
utils::{merge_iters::MergeSorted as _, side::Side, string_builder::StringBuilder},
|
||||
};
|
||||
|
||||
/// A sequence of operations that can be applied to a text document.
|
||||
/// `EditedText` supports merging two sequences of operations using the
|
||||
/// principle of Operational Transformation.
|
||||
///
|
||||
/// It's mainly created through the `from_strings` method, then merged with
|
||||
/// another `EditedText` derived from the same original text and then applied to
|
||||
/// the original text to get the reconciled text of concurrent edits.
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone, PartialEq, Default)]
|
||||
pub struct EditedText<'a, T>
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
text: &'a str,
|
||||
operations: Vec<OrderedOperation<T>>,
|
||||
pub(crate) cursors: Vec<CursorPosition>,
|
||||
}
|
||||
|
||||
impl<'a> EditedText<'a, String> {
|
||||
/// Create an `EditedText` from the given original (old) and updated (new)
|
||||
/// strings. The returned `EditedText` represents the changes from the
|
||||
/// original to the updated text. When the return value is applied to
|
||||
/// the original text, it will result in the updated text. The default
|
||||
/// word tokenizer is used to tokenize the text which splits the text on
|
||||
/// whitespaces.
|
||||
#[must_use]
|
||||
pub fn from_strings(original: &'a str, updated: TextWithCursors<'a>) -> Self {
|
||||
Self::from_strings_with_tokenizer(original, updated, &word_tokenizer)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T> EditedText<'a, T>
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
/// Create an `EditedText` from the given original (old) and updated (new)
|
||||
/// strings. The returned `EditedText` represents the changes from the
|
||||
/// original to the updated text. When the return value is applied to
|
||||
/// the original text, it will result in the updated text. The tokenizer
|
||||
/// function is used to tokenize the text.
|
||||
pub fn from_strings_with_tokenizer(
|
||||
original: &'a str,
|
||||
updated: TextWithCursors<'a>,
|
||||
tokenizer: &Tokenizer<T>,
|
||||
) -> Self {
|
||||
let original_tokens = (tokenizer)(original);
|
||||
let updated_tokens = (tokenizer)(&updated.text);
|
||||
|
||||
let diff: Vec<RawOperation<T>> = diff(&original_tokens, &updated_tokens);
|
||||
|
||||
Self::new(
|
||||
original,
|
||||
Self::cook_operations(Self::elongate_operations(diff)).collect(),
|
||||
updated.cursors,
|
||||
)
|
||||
}
|
||||
|
||||
fn elongate_operations<I>(raw_operations: I) -> Vec<RawOperation<T>>
|
||||
where
|
||||
I: IntoIterator<Item = RawOperation<T>>,
|
||||
{
|
||||
// This might look bad, but this makes sense. The inserts and deltes can be
|
||||
// interleaved, such as: IDIDID and we need to turn this into IIIDDD.
|
||||
// So we need to keep track of both the last insert and delete operations, not
|
||||
// just the last one.
|
||||
let mut maybe_previous_insert: Option<RawOperation<T>> = None;
|
||||
let mut maybe_previous_delete: Option<RawOperation<T>> = None;
|
||||
|
||||
let mut result: Vec<RawOperation<T>> = raw_operations
|
||||
.into_iter()
|
||||
.flat_map(|next| match next {
|
||||
RawOperation::Insert(..) => match maybe_previous_insert.take() {
|
||||
Some(prev) if prev.is_right_joinable() && next.is_left_joinable() => {
|
||||
maybe_previous_insert = Some(prev.extend(next));
|
||||
Box::new(iter::empty()) as Box<dyn Iterator<Item = RawOperation<T>>>
|
||||
}
|
||||
prev => {
|
||||
maybe_previous_insert = Some(next);
|
||||
Box::new(prev.into_iter())
|
||||
}
|
||||
},
|
||||
RawOperation::Delete(..) => match maybe_previous_delete.take() {
|
||||
Some(prev) if prev.is_right_joinable() && next.is_left_joinable() => {
|
||||
maybe_previous_delete = Some(prev.extend(next));
|
||||
Box::new(iter::empty()) as Box<dyn Iterator<Item = RawOperation<T>>>
|
||||
}
|
||||
prev => {
|
||||
maybe_previous_delete = Some(next);
|
||||
Box::new(prev.into_iter())
|
||||
}
|
||||
},
|
||||
RawOperation::Equal(..) => Box::new(
|
||||
maybe_previous_insert
|
||||
.take()
|
||||
.into_iter()
|
||||
.chain(maybe_previous_delete.take())
|
||||
.chain(iter::once(next)),
|
||||
)
|
||||
as Box<dyn Iterator<Item = RawOperation<T>>>,
|
||||
})
|
||||
.collect();
|
||||
|
||||
if let Some(prev) = maybe_previous_insert {
|
||||
result.push(prev);
|
||||
}
|
||||
|
||||
if let Some(prev) = maybe_previous_delete {
|
||||
result.push(prev);
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
// Turn raw operations into ordered operations while keeping track of old & new
|
||||
// indexes.
|
||||
fn cook_operations<I>(raw_operations: I) -> impl Iterator<Item = OrderedOperation<T>>
|
||||
where
|
||||
I: IntoIterator<Item = RawOperation<T>>,
|
||||
{
|
||||
let mut new_index = 0; // this is the start index of the operation on the new text
|
||||
let mut order = 0; // this is the start index of the operation on the original text
|
||||
|
||||
raw_operations.into_iter().filter_map(move |raw_operation| {
|
||||
let length = raw_operation.original_text_length();
|
||||
|
||||
match raw_operation {
|
||||
RawOperation::Equal(..) => {
|
||||
let op = if cfg!(debug_assertions) {
|
||||
Operation::create_equal_with_text(
|
||||
new_index,
|
||||
raw_operation.get_original_text(),
|
||||
)
|
||||
} else {
|
||||
Operation::create_equal(new_index, length)
|
||||
}
|
||||
.map(|operation| OrderedOperation { order, operation });
|
||||
|
||||
new_index += length;
|
||||
order += length;
|
||||
|
||||
op
|
||||
}
|
||||
RawOperation::Insert(tokens) => {
|
||||
let op = Operation::create_insert(new_index, tokens)
|
||||
.map(|operation| OrderedOperation { order, operation });
|
||||
|
||||
new_index += length;
|
||||
|
||||
op
|
||||
}
|
||||
RawOperation::Delete(..) => {
|
||||
let op = if cfg!(debug_assertions) {
|
||||
Operation::create_delete_with_text(
|
||||
new_index,
|
||||
raw_operation.get_original_text(),
|
||||
)
|
||||
} else {
|
||||
Operation::create_delete(new_index, length)
|
||||
}
|
||||
.map(|operation| OrderedOperation { order, operation });
|
||||
|
||||
order += length;
|
||||
|
||||
op
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Create a new `EditedText` with the given operations.
|
||||
/// The operations must be in the order in which they are meant to be
|
||||
/// applied. The operations must not overlap.
|
||||
fn new(
|
||||
text: &'a str,
|
||||
operations: Vec<OrderedOperation<T>>,
|
||||
mut cursors: Vec<CursorPosition>,
|
||||
) -> Self {
|
||||
operations
|
||||
.iter()
|
||||
.zip(operations.iter().skip(1))
|
||||
.for_each(|(previous, next)| {
|
||||
debug_assert!(
|
||||
previous.operation.start_index() <= next.operation.start_index(),
|
||||
"{} must not come before {} yet it does",
|
||||
previous.operation,
|
||||
next.operation
|
||||
);
|
||||
});
|
||||
|
||||
cursors.sort_by_key(|cursor| cursor.char_index);
|
||||
|
||||
Self {
|
||||
text,
|
||||
operations,
|
||||
cursors,
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn merge(self, other: Self) -> Self {
|
||||
debug_assert_eq!(
|
||||
self.text, other.text,
|
||||
"`EditedText`-s must be derived from the same text to be mergable"
|
||||
);
|
||||
|
||||
let mut left_merge_context = MergeContext::default();
|
||||
let mut right_merge_context = MergeContext::default();
|
||||
|
||||
let mut merged_cursors = Vec::with_capacity(self.cursors.len() + other.cursors.len());
|
||||
let mut left_cursors = self.cursors.into_iter().peekable();
|
||||
let mut right_cursors = other.cursors.into_iter().peekable();
|
||||
|
||||
let merged_operations: Vec<OrderedOperation<T>> = self
|
||||
.operations
|
||||
.into_iter()
|
||||
// The current text is always the left; the other operation is the right side.
|
||||
.map(|op| (op, Side::Left))
|
||||
.merge_sorted_by_key(
|
||||
other.operations.into_iter().map(|op| (op, Side::Right)),
|
||||
|(operation, _)| {
|
||||
(
|
||||
operation.order,
|
||||
operation.operation.start_index(),
|
||||
// Make sure that the ordering is deterministic regardless which text
|
||||
// is left or right.
|
||||
match &operation.operation {
|
||||
Operation::Equal { index, .. } => index.to_string(),
|
||||
Operation::Insert { text, .. } => text
|
||||
.iter()
|
||||
.map(crate::tokenizer::token::Token::original)
|
||||
.collect::<String>(),
|
||||
Operation::Delete {
|
||||
deleted_character_count,
|
||||
..
|
||||
} => deleted_character_count.to_string(),
|
||||
},
|
||||
)
|
||||
},
|
||||
)
|
||||
.flat_map(|(OrderedOperation { order, operation }, side)| {
|
||||
let original_start = operation.start_index() as i64;
|
||||
let original_end = operation.end_index();
|
||||
let original_length = operation.len() as i64;
|
||||
|
||||
let result = match side {
|
||||
Side::Left => operation.merge_operations_with_context(
|
||||
&mut right_merge_context,
|
||||
&mut left_merge_context,
|
||||
),
|
||||
Side::Right => operation.merge_operations_with_context(
|
||||
&mut left_merge_context,
|
||||
&mut right_merge_context,
|
||||
),
|
||||
};
|
||||
|
||||
if let Some(ref op @ (Operation::Insert { .. } | Operation::Equal { .. })) = result
|
||||
{
|
||||
let shift = op.start_index() as i64 - original_start + op.len() as i64
|
||||
- original_length;
|
||||
match side {
|
||||
Side::Left => {
|
||||
while let Some(cursor) =
|
||||
left_cursors.next_if(|cursor| cursor.char_index <= original_end + 1)
|
||||
{
|
||||
merged_cursors.push(cursor.with_index(
|
||||
(op.start_index() as i64).max(cursor.char_index as i64 + shift)
|
||||
as usize,
|
||||
));
|
||||
}
|
||||
}
|
||||
Side::Right => {
|
||||
while let Some(cursor) = right_cursors
|
||||
.next_if(|cursor| cursor.char_index <= original_end + 1)
|
||||
{
|
||||
merged_cursors.push(cursor.with_index(
|
||||
(op.start_index() as i64).max(cursor.char_index as i64 + shift)
|
||||
as usize,
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
.map(|operation| OrderedOperation { order, operation })
|
||||
.into_iter()
|
||||
})
|
||||
.collect();
|
||||
|
||||
let last_index = merged_operations
|
||||
.iter()
|
||||
.filter(|operation| {
|
||||
matches!(
|
||||
operation.operation,
|
||||
Operation::Insert { .. } | Operation::Equal { .. }
|
||||
)
|
||||
})
|
||||
.next_back()
|
||||
.map_or(0, |op| op.operation.end_index());
|
||||
|
||||
for cursor in left_cursors.chain(right_cursors) {
|
||||
merged_cursors.push(cursor.with_index(last_index));
|
||||
}
|
||||
|
||||
Self::new(self.text, merged_operations, merged_cursors)
|
||||
}
|
||||
|
||||
/// Apply the operations to the text and return the resulting text.
|
||||
#[must_use]
|
||||
pub fn apply(&self) -> String {
|
||||
let mut builder: StringBuilder<'_> = StringBuilder::new(self.text);
|
||||
|
||||
for OrderedOperation { operation, .. } in &self.operations {
|
||||
builder = operation.apply(builder);
|
||||
}
|
||||
|
||||
builder.build()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::env;
|
||||
|
||||
use insta::assert_debug_snapshot;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_calculate_operations() {
|
||||
let left = "hello world! How are you? Adam";
|
||||
let right = "Hello, my friend! How are you doing? Albert";
|
||||
|
||||
let operations = EditedText::from_strings(left, right.into());
|
||||
|
||||
insta::assert_debug_snapshot!(operations);
|
||||
|
||||
let new_right = operations.apply();
|
||||
assert_eq!(new_right.to_string(), right);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_calculate_operations_with_no_diff() {
|
||||
let text = "hello world!";
|
||||
|
||||
let operations = EditedText::from_strings(text, text.into());
|
||||
|
||||
assert_debug_snapshot!(operations);
|
||||
|
||||
let new_right = operations.apply();
|
||||
assert_eq!(new_right.to_string(), text);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_calculate_operations_with_insert() {
|
||||
let original = "hello world! ...";
|
||||
let left = "Hello world! I'm Andras.";
|
||||
let right = "Hello world! How are you?";
|
||||
let expected = "Hello world! How are you? I'm Andras.";
|
||||
|
||||
let operations_1 = EditedText::from_strings(original, left.into());
|
||||
let operations_2 = EditedText::from_strings(original, right.into());
|
||||
|
||||
let operations = operations_1.merge(operations_2);
|
||||
assert_eq!(operations.apply(), expected);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,73 +0,0 @@
|
|||
use core::fmt::Debug;
|
||||
|
||||
use crate::operation_transformation::Operation;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct MergeContext<T>
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
last_operation: Option<Operation<T>>,
|
||||
pub shift: i64,
|
||||
}
|
||||
|
||||
impl<T> Default for MergeContext<T>
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
fn default() -> Self {
|
||||
MergeContext {
|
||||
last_operation: None,
|
||||
shift: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> MergeContext<T>
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
pub fn last_operation(&self) -> Option<&Operation<T>> { self.last_operation.as_ref() }
|
||||
|
||||
pub fn replace_last_operation(&mut self, operation: Option<Operation<T>>) {
|
||||
self.last_operation = operation;
|
||||
}
|
||||
|
||||
/// Replace the last delete operation (if there was one) with a new one
|
||||
/// while applying it to the `shift` in case the last operation
|
||||
/// was a delete.
|
||||
pub fn consume_and_replace_last_operation(&mut self, operation: Option<Operation<T>>) {
|
||||
if let Some(Operation::Delete {
|
||||
deleted_character_count,
|
||||
..
|
||||
}) = self.last_operation.take()
|
||||
{
|
||||
self.shift -= deleted_character_count as i64;
|
||||
}
|
||||
|
||||
self.last_operation = operation;
|
||||
}
|
||||
|
||||
/// Remove the last operation (if there was one) in case it is behind the
|
||||
/// threshold operation. This updates the `shift` in case the last operation
|
||||
/// was a delete.
|
||||
pub fn consume_last_operation_if_it_is_too_behind(&mut self, threshold_index: i64) {
|
||||
if let Some(last_operation) = self.last_operation.as_ref() {
|
||||
if let Operation::Delete {
|
||||
deleted_character_count,
|
||||
..
|
||||
} = last_operation
|
||||
{
|
||||
if threshold_index + self.shift > last_operation.end_index() as i64 {
|
||||
self.shift -= *deleted_character_count as i64;
|
||||
self.last_operation = None;
|
||||
}
|
||||
} else if let Operation::Insert { .. } = last_operation
|
||||
&& threshold_index + self.shift - last_operation.len() as i64
|
||||
> last_operation.end_index() as i64
|
||||
{
|
||||
self.last_operation = None;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,513 +0,0 @@
|
|||
use core::fmt::{Debug, Display};
|
||||
use std::ops::Range;
|
||||
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::merge_context::MergeContext;
|
||||
use crate::{
|
||||
Token,
|
||||
utils::{
|
||||
find_longest_prefix_contained_within::find_longest_prefix_contained_within,
|
||||
string_builder::StringBuilder,
|
||||
},
|
||||
};
|
||||
|
||||
/// Represents a change that can be applied on a `StringBuilder`.
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Clone, PartialEq)]
|
||||
pub enum Operation<T>
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
Equal {
|
||||
index: usize,
|
||||
length: usize,
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
text: Option<String>,
|
||||
},
|
||||
|
||||
Insert {
|
||||
index: usize,
|
||||
text: Vec<Token<T>>,
|
||||
},
|
||||
|
||||
Delete {
|
||||
index: usize,
|
||||
deleted_character_count: usize,
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
deleted_text: Option<String>,
|
||||
},
|
||||
}
|
||||
|
||||
impl<T> Operation<T>
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
/// Creates an equal operation with the given index.
|
||||
/// This operation is used to indicate that the text at the given index
|
||||
/// is unchanged.
|
||||
pub fn create_equal(index: usize, length: usize) -> Option<Self> {
|
||||
if length == 0 {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(Operation::Equal {
|
||||
index,
|
||||
length,
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
text: None,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn create_equal_with_text(index: usize, text: String) -> Option<Self> {
|
||||
if text.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(Operation::Equal {
|
||||
index,
|
||||
length: text.chars().count(),
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
text: Some(text),
|
||||
})
|
||||
}
|
||||
|
||||
/// Creates an insert operation with the given index and text.
|
||||
/// If the text is empty (meaning that the operation would be a no-op),
|
||||
/// returns None.
|
||||
pub fn create_insert(index: usize, text: Vec<Token<T>>) -> Option<Self> {
|
||||
if text.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(Operation::Insert { index, text })
|
||||
}
|
||||
|
||||
/// Creates a delete operation with the given index and number of
|
||||
/// to-be-deleted characters. If the operation would delete 0 (meaning
|
||||
/// that the operation would be a no-op), returns None.
|
||||
pub fn create_delete(index: usize, deleted_character_count: usize) -> Option<Self> {
|
||||
if deleted_character_count == 0 {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(Operation::Delete {
|
||||
index,
|
||||
deleted_character_count,
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
deleted_text: None,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn create_delete_with_text(index: usize, text: String) -> Option<Self> {
|
||||
if text.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(Operation::Delete {
|
||||
index,
|
||||
deleted_character_count: text.chars().count(),
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
deleted_text: Some(text),
|
||||
})
|
||||
}
|
||||
|
||||
/// Applies the operation to the given `StringBuilder`, returning the
|
||||
/// modified `StringBuilder`.
|
||||
///
|
||||
/// When compiled in debug mode, panics if a delete operation is attempted
|
||||
/// on a range of text that does not match the text to be deleted.
|
||||
pub fn apply<'a>(&self, mut builder: StringBuilder<'a>) -> StringBuilder<'a> {
|
||||
match self {
|
||||
Operation::Equal {
|
||||
#[cfg(debug_assertions)]
|
||||
text,
|
||||
..
|
||||
} => {
|
||||
#[cfg(debug_assertions)]
|
||||
debug_assert!(
|
||||
text.as_ref()
|
||||
.is_none_or(|text| builder.get_slice(self.range()) == *text),
|
||||
"Text which is supposed to be equal does not match the text in the range"
|
||||
);
|
||||
|
||||
return builder;
|
||||
}
|
||||
Operation::Insert { text, .. } => builder.insert(
|
||||
self.start_index(),
|
||||
&text.iter().map(Token::original).collect::<String>(),
|
||||
),
|
||||
Operation::Delete {
|
||||
#[cfg(debug_assertions)]
|
||||
deleted_text,
|
||||
..
|
||||
} => {
|
||||
#[cfg(debug_assertions)]
|
||||
debug_assert!(
|
||||
deleted_text
|
||||
.as_ref()
|
||||
.is_none_or(|text| builder.get_slice(self.range()) == *text),
|
||||
"Text to delete does not match the text in the range"
|
||||
);
|
||||
|
||||
builder.delete(self.range());
|
||||
}
|
||||
}
|
||||
|
||||
builder
|
||||
}
|
||||
|
||||
/// Returns the index of the first character that the operation affects.
|
||||
pub fn start_index(&self) -> usize {
|
||||
match self {
|
||||
Operation::Equal { index, .. }
|
||||
| Operation::Insert { index, .. }
|
||||
| Operation::Delete { index, .. } => *index,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the index of the last character that the operation affects.
|
||||
pub fn end_index(&self) -> usize {
|
||||
debug_assert!(
|
||||
self.len() > 0,
|
||||
" len() must be greater than 0 because operations must be non-empty"
|
||||
);
|
||||
self.start_index() + self.len() - 1
|
||||
}
|
||||
|
||||
/// Returns the range of indices of characters that the operation affects.
|
||||
#[allow(clippy::range_plus_one)]
|
||||
pub fn range(&self) -> Range<usize> { self.start_index()..self.end_index() + 1 }
|
||||
|
||||
/// Returns the number of affected characters. It is always greater than 0
|
||||
/// because empty operations cannot be created.
|
||||
pub fn len(&self) -> usize {
|
||||
match self {
|
||||
Operation::Equal { length, .. } => *length,
|
||||
Operation::Insert { text, .. } => text.iter().map(Token::get_original_length).sum(),
|
||||
Operation::Delete {
|
||||
deleted_character_count,
|
||||
..
|
||||
} => *deleted_character_count,
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new operation with the same type and text but with the given
|
||||
/// index.
|
||||
pub fn with_index(self, index: usize) -> Self {
|
||||
match self {
|
||||
Operation::Equal {
|
||||
length,
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
text,
|
||||
..
|
||||
} => Operation::Equal {
|
||||
index,
|
||||
length,
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
text,
|
||||
},
|
||||
Operation::Insert { text, .. } => Operation::Insert { index, text },
|
||||
Operation::Delete {
|
||||
deleted_character_count,
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
deleted_text,
|
||||
..
|
||||
} => Operation::Delete {
|
||||
index,
|
||||
deleted_character_count,
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
deleted_text,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new operation with the same type and text but with the index
|
||||
/// shifted by the given offset. The offset can be negative but the
|
||||
/// resulting index must be non-negative.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// In debug mode, panics if the resulting index is negative.
|
||||
pub fn with_shifted_index(self, offset: i64) -> Self {
|
||||
let index = self.start_index() as i64 + offset;
|
||||
debug_assert!(index >= 0, "Shifted index must be non-negative");
|
||||
|
||||
self.with_index(index as usize)
|
||||
}
|
||||
|
||||
/// Merges the operation with the given context, producing a new operation
|
||||
/// and updating the context. This implements a comples FSM that handles
|
||||
/// the merging of operations in a way that is consistent with the text.
|
||||
/// The contexts are updated in-place.
|
||||
#[allow(clippy::too_many_lines)]
|
||||
pub fn merge_operations_with_context(
|
||||
self,
|
||||
affecting_context: &mut MergeContext<T>,
|
||||
produced_context: &mut MergeContext<T>,
|
||||
) -> Option<Operation<T>> {
|
||||
affecting_context.consume_last_operation_if_it_is_too_behind(self.start_index() as i64);
|
||||
let operation = self.with_shifted_index(affecting_context.shift);
|
||||
|
||||
match (operation, affecting_context.last_operation()) {
|
||||
(operation @ Operation::Insert { .. }, None | Some(Operation::Equal { .. })) => {
|
||||
produced_context.shift += operation.len() as i64;
|
||||
produced_context.consume_and_replace_last_operation(Some(operation.clone()));
|
||||
Some(operation)
|
||||
}
|
||||
|
||||
(
|
||||
Operation::Insert { text, index },
|
||||
Some(Operation::Insert {
|
||||
text: previous_inserted_text,
|
||||
..
|
||||
}),
|
||||
) => {
|
||||
// In case the current insert's prefix appears in the previously inserted text,
|
||||
// we can trim the current insert to only include the non-overlapping part.
|
||||
// This way, we don't end up duplicating text.
|
||||
let offset_in_tokens =
|
||||
find_longest_prefix_contained_within(previous_inserted_text, &text);
|
||||
let offset_in_length = text
|
||||
.iter()
|
||||
.take(offset_in_tokens)
|
||||
.map(Token::get_original_length)
|
||||
.sum::<usize>();
|
||||
let trimmed_operation =
|
||||
Operation::create_insert(index, text[offset_in_tokens..].to_vec());
|
||||
|
||||
affecting_context.shift -= offset_in_length as i64;
|
||||
produced_context.shift += trimmed_operation
|
||||
.as_ref()
|
||||
.map(Operation::len)
|
||||
.unwrap_or_default() as i64;
|
||||
produced_context.consume_and_replace_last_operation(trimmed_operation.clone());
|
||||
|
||||
trimmed_operation
|
||||
}
|
||||
|
||||
(
|
||||
operation @ Operation::Delete { .. },
|
||||
None | Some(Operation::Insert { .. } | Operation::Equal { .. }),
|
||||
) => {
|
||||
produced_context.consume_and_replace_last_operation(Some(operation.clone()));
|
||||
Some(operation)
|
||||
}
|
||||
|
||||
(
|
||||
operation @ Operation::Insert { .. },
|
||||
Some(last_delete @ Operation::Delete { .. }),
|
||||
) => {
|
||||
produced_context.shift += operation.len() as i64;
|
||||
|
||||
debug_assert!(
|
||||
last_delete.range().contains(&operation.start_index()),
|
||||
"There is a last delete ({last_delete}) but the operation ({operation}) is \
|
||||
not contained in it"
|
||||
);
|
||||
|
||||
let difference = operation.start_index() as i64 - last_delete.start_index() as i64;
|
||||
|
||||
let moved_operation = operation.with_index(last_delete.start_index());
|
||||
|
||||
affecting_context.replace_last_operation(Operation::create_delete(
|
||||
moved_operation.end_index() + 1,
|
||||
(last_delete.len() as i64 - difference) as usize,
|
||||
));
|
||||
affecting_context.shift -= difference;
|
||||
|
||||
produced_context.consume_and_replace_last_operation(Some(moved_operation.clone()));
|
||||
|
||||
Some(moved_operation)
|
||||
}
|
||||
|
||||
(
|
||||
operation @ Operation::Delete { .. },
|
||||
Some(last_delete @ Operation::Delete { .. }),
|
||||
) => {
|
||||
debug_assert!(
|
||||
last_delete.range().contains(&operation.start_index()),
|
||||
"There is a last delete ({last_delete}) but the operation ({operation}) is \
|
||||
not contained in it"
|
||||
);
|
||||
|
||||
let difference = operation.start_index() as i64 - last_delete.start_index() as i64;
|
||||
|
||||
let updated_delete = Operation::create_delete(
|
||||
last_delete.start_index(),
|
||||
0.max(operation.end_index() as i64 - last_delete.end_index() as i64) as usize,
|
||||
);
|
||||
|
||||
affecting_context.replace_last_operation(Operation::create_delete(
|
||||
last_delete.start_index(),
|
||||
0.max(last_delete.end_index() as i64 - operation.end_index() as i64) as usize,
|
||||
));
|
||||
affecting_context.shift -= difference;
|
||||
|
||||
produced_context.consume_and_replace_last_operation(updated_delete.clone());
|
||||
|
||||
updated_delete
|
||||
}
|
||||
(
|
||||
ref operation @ Operation::Equal {
|
||||
length,
|
||||
#[cfg(debug_assertions)]
|
||||
ref text,
|
||||
..
|
||||
},
|
||||
Some(last_delete @ Operation::Delete { .. }),
|
||||
) => {
|
||||
debug_assert!(
|
||||
last_delete.range().contains(&operation.start_index()),
|
||||
"There is a last delete ({last_delete}) but the operation ({operation}) is \
|
||||
not contained in it"
|
||||
);
|
||||
|
||||
let overlap = (length as i64)
|
||||
.min(last_delete.end_index() as i64 - operation.start_index() as i64 + 1);
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
let result = text.as_ref().map_or_else(
|
||||
|| {
|
||||
Operation::create_equal(
|
||||
operation.end_index().min(last_delete.end_index()),
|
||||
(length as i64 - overlap) as usize,
|
||||
)
|
||||
},
|
||||
|text| {
|
||||
Operation::create_equal_with_text(
|
||||
operation.end_index().min(last_delete.end_index()),
|
||||
text.chars().skip(overlap as usize).collect::<String>(),
|
||||
)
|
||||
},
|
||||
);
|
||||
|
||||
#[cfg(not(debug_assertions))]
|
||||
let result = Operation::create_equal(
|
||||
operation.end_index().min(last_delete.end_index()),
|
||||
(length as i64 - overlap) as usize,
|
||||
);
|
||||
|
||||
result
|
||||
}
|
||||
(operation @ Operation::Equal { .. }, _) => Some(operation),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Display for Operation<T>
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||
match self {
|
||||
Operation::Equal {
|
||||
index,
|
||||
length,
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
text,
|
||||
} => {
|
||||
#[cfg(debug_assertions)]
|
||||
write!(
|
||||
f,
|
||||
"<equal {} from index {}>",
|
||||
text.as_ref()
|
||||
.map(|text| format!("'{text}'"))
|
||||
.unwrap_or(format!("{length} characters")),
|
||||
index
|
||||
)?;
|
||||
|
||||
#[cfg(not(debug_assertions))]
|
||||
write!(f, "<equal {length} from index {index}>")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
Operation::Insert { index, text } => {
|
||||
write!(
|
||||
f,
|
||||
"<insert '{}' from index {}>",
|
||||
text.iter().map(Token::original).collect::<String>(),
|
||||
index
|
||||
)
|
||||
}
|
||||
Operation::Delete {
|
||||
index,
|
||||
deleted_character_count,
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
deleted_text,
|
||||
} => {
|
||||
#[cfg(debug_assertions)]
|
||||
write!(
|
||||
f,
|
||||
"<delete {} from index {}>",
|
||||
deleted_text
|
||||
.as_ref()
|
||||
.map(|text| format!("'{text}'"))
|
||||
.unwrap_or(format!("{deleted_character_count} characters")),
|
||||
index
|
||||
)?;
|
||||
|
||||
#[cfg(not(debug_assertions))]
|
||||
write!(
|
||||
f,
|
||||
"<delete {deleted_character_count} characters from index {index}>",
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Debug for Operation<T>
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { write!(f, "{self}") }
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
#[should_panic(expected = "Shifted index must be non-negative")]
|
||||
fn test_shifting_error() {
|
||||
insta::assert_debug_snapshot!(
|
||||
Operation::create_insert(1, vec!["hi".into()])
|
||||
.unwrap()
|
||||
.with_shifted_index(-2)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_apply_delete_with_create() {
|
||||
let builder = StringBuilder::new("hello world");
|
||||
let operation = Operation::<()>::create_delete_with_text(5, " world".to_owned()).unwrap();
|
||||
|
||||
assert_eq!(operation.apply(builder).build(), "hello");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_apply_insert() {
|
||||
let builder = StringBuilder::new("hello");
|
||||
let operation = Operation::create_insert(5, vec![" my friend".into()]).unwrap();
|
||||
|
||||
assert_eq!(operation.apply(builder).build(), "hello my friend");
|
||||
}
|
||||
}
|
||||
|
|
@ -1,14 +0,0 @@
|
|||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::operation_transformation::Operation;
|
||||
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct OrderedOperation<T>
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
pub order: usize,
|
||||
pub operation: Operation<T>,
|
||||
}
|
||||
|
|
@ -1,43 +0,0 @@
|
|||
---
|
||||
source: reconcile/src/operation_transformation/edited_text.rs
|
||||
expression: operations
|
||||
snapshot_kind: text
|
||||
---
|
||||
EditedText {
|
||||
text: "hello world! How are you? Adam",
|
||||
operations: [
|
||||
OrderedOperation {
|
||||
order: 0,
|
||||
operation: <insert 'Hello, my friend!' from index 0>,
|
||||
},
|
||||
OrderedOperation {
|
||||
order: 0,
|
||||
operation: <delete 'hello world!' from index 17>,
|
||||
},
|
||||
OrderedOperation {
|
||||
order: 12,
|
||||
operation: <equal ' ' from index 17>,
|
||||
},
|
||||
OrderedOperation {
|
||||
order: 13,
|
||||
operation: <equal 'How' from index 18>,
|
||||
},
|
||||
OrderedOperation {
|
||||
order: 16,
|
||||
operation: <equal ' ' from index 21>,
|
||||
},
|
||||
OrderedOperation {
|
||||
order: 17,
|
||||
operation: <equal 'are' from index 22>,
|
||||
},
|
||||
OrderedOperation {
|
||||
order: 20,
|
||||
operation: <insert ' you doing? Albert' from index 25>,
|
||||
},
|
||||
OrderedOperation {
|
||||
order: 20,
|
||||
operation: <delete ' you? Adam' from index 43>,
|
||||
},
|
||||
],
|
||||
cursors: [],
|
||||
}
|
||||
|
|
@ -1,23 +0,0 @@
|
|||
---
|
||||
source: reconcile/src/operation_transformation/edited_text.rs
|
||||
expression: operations
|
||||
snapshot_kind: text
|
||||
---
|
||||
EditedText {
|
||||
text: "hello world!",
|
||||
operations: [
|
||||
OrderedOperation {
|
||||
order: 0,
|
||||
operation: <equal 'hello' from index 0>,
|
||||
},
|
||||
OrderedOperation {
|
||||
order: 5,
|
||||
operation: <equal ' ' from index 5>,
|
||||
},
|
||||
OrderedOperation {
|
||||
order: 6,
|
||||
operation: <equal 'world!' from index 6>,
|
||||
},
|
||||
],
|
||||
cursors: [],
|
||||
}
|
||||
|
|
@ -1,61 +0,0 @@
|
|||
---
|
||||
source: reconcile/src/operations/edited_text.rs
|
||||
expression: operations
|
||||
snapshot_kind: text
|
||||
---
|
||||
EditedText {
|
||||
text: "hello world! How are you? Adam",
|
||||
operations: [
|
||||
OrderedOperation {
|
||||
order: 0,
|
||||
operation: Insert {
|
||||
index: 0,
|
||||
text: "Hello, my friend! ",
|
||||
},
|
||||
},
|
||||
OrderedOperation {
|
||||
order: 0,
|
||||
operation: Delete {
|
||||
index: 18,
|
||||
deleted_character_count: 13,
|
||||
deleted_text: Some(
|
||||
"hello world! ",
|
||||
),
|
||||
},
|
||||
},
|
||||
OrderedOperation {
|
||||
order: 21,
|
||||
operation: Delete {
|
||||
index: 26,
|
||||
deleted_character_count: 5,
|
||||
deleted_text: Some(
|
||||
"you? ",
|
||||
),
|
||||
},
|
||||
},
|
||||
OrderedOperation {
|
||||
order: 26,
|
||||
operation: Delete {
|
||||
index: 26,
|
||||
deleted_character_count: 5,
|
||||
deleted_text: Some(
|
||||
" Adam",
|
||||
),
|
||||
},
|
||||
},
|
||||
OrderedOperation {
|
||||
order: 31,
|
||||
operation: Insert {
|
||||
index: 26,
|
||||
text: "you ",
|
||||
},
|
||||
},
|
||||
OrderedOperation {
|
||||
order: 31,
|
||||
operation: Insert {
|
||||
index: 30,
|
||||
text: "doing? Albert",
|
||||
},
|
||||
},
|
||||
],
|
||||
}
|
||||
|
|
@ -1,60 +0,0 @@
|
|||
---
|
||||
source: reconcile/src/operations/operation_sequence.rs
|
||||
expression: operations
|
||||
snapshot_kind: text
|
||||
---
|
||||
EditedText {
|
||||
operations: [
|
||||
OrderedOperation {
|
||||
order: 0,
|
||||
operation: Insert {
|
||||
index: 0,
|
||||
text: "Hello, my friend! ",
|
||||
},
|
||||
},
|
||||
OrderedOperation {
|
||||
order: 0,
|
||||
operation: Delete {
|
||||
index: 18,
|
||||
deleted_character_count: 13,
|
||||
deleted_text: Some(
|
||||
"hello world! ",
|
||||
),
|
||||
},
|
||||
},
|
||||
OrderedOperation {
|
||||
order: 21,
|
||||
operation: Delete {
|
||||
index: 26,
|
||||
deleted_character_count: 5,
|
||||
deleted_text: Some(
|
||||
"you? ",
|
||||
),
|
||||
},
|
||||
},
|
||||
OrderedOperation {
|
||||
order: 26,
|
||||
operation: Delete {
|
||||
index: 26,
|
||||
deleted_character_count: 5,
|
||||
deleted_text: Some(
|
||||
" Adam",
|
||||
),
|
||||
},
|
||||
},
|
||||
OrderedOperation {
|
||||
order: 31,
|
||||
operation: Insert {
|
||||
index: 26,
|
||||
text: "you ",
|
||||
},
|
||||
},
|
||||
OrderedOperation {
|
||||
order: 31,
|
||||
operation: Insert {
|
||||
index: 30,
|
||||
text: "doing? Albert",
|
||||
},
|
||||
},
|
||||
],
|
||||
}
|
||||
|
|
@ -1,6 +0,0 @@
|
|||
use token::Token;
|
||||
|
||||
pub mod token;
|
||||
pub mod word_tokenizer;
|
||||
|
||||
pub type Tokenizer<T> = dyn Fn(&str) -> Vec<Token<T>>;
|
||||
|
|
@ -1,6 +0,0 @@
|
|||
---
|
||||
source: reconcile/src/tokenizer/word_tokenizer.rs
|
||||
expression: "word_tokenizer(\"\")"
|
||||
snapshot_kind: text
|
||||
---
|
||||
[]
|
||||
|
|
@ -1,25 +0,0 @@
|
|||
---
|
||||
source: reconcile/src/tokenizer/word_tokenizer.rs
|
||||
expression: "word_tokenizer(\" what? \")"
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
Token {
|
||||
normalised: " what?",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: "what?",
|
||||
original: "what?",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: " ",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
]
|
||||
|
|
@ -1,55 +0,0 @@
|
|||
---
|
||||
source: reconcile/src/tokenizer/word_tokenizer.rs
|
||||
expression: "word_tokenizer(\" hello, \\nwhere are you?\")"
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
Token {
|
||||
normalised: " hello,",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: "hello,",
|
||||
original: "hello,",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: " \nwhere",
|
||||
original: " \n",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: "where",
|
||||
original: "where",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: " are",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: "are",
|
||||
original: "are",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: " you?",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: "you?",
|
||||
original: "you?",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
]
|
||||
|
|
@ -1,39 +0,0 @@
|
|||
---
|
||||
source: reconcile/src/tokenizer/word_tokenizer.rs
|
||||
expression: "word_tokenizer(\" hello, \\nwhere are you?\")"
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
Token {
|
||||
normalised: " ",
|
||||
original: " ",
|
||||
},
|
||||
Token {
|
||||
normalised: "hello,",
|
||||
original: "hello,",
|
||||
},
|
||||
Token {
|
||||
normalised: " \n",
|
||||
original: " \n",
|
||||
},
|
||||
Token {
|
||||
normalised: "where",
|
||||
original: "where",
|
||||
},
|
||||
Token {
|
||||
normalised: " ",
|
||||
original: " ",
|
||||
},
|
||||
Token {
|
||||
normalised: "are",
|
||||
original: "are",
|
||||
},
|
||||
Token {
|
||||
normalised: " ",
|
||||
original: " ",
|
||||
},
|
||||
Token {
|
||||
normalised: "you?",
|
||||
original: "you?",
|
||||
},
|
||||
]
|
||||
|
|
@ -1,25 +0,0 @@
|
|||
---
|
||||
source: reconcile/src/tokenizer/word_tokenizer.rs
|
||||
expression: "word_tokenizer(\"Hi there!\")"
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
Token {
|
||||
normalised: "Hi",
|
||||
original: "Hi",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: " there!",
|
||||
original: " ",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
Token {
|
||||
normalised: "there!",
|
||||
original: "there!",
|
||||
is_left_joinable: true,
|
||||
is_right_joinable: true,
|
||||
},
|
||||
]
|
||||
|
|
@ -1,64 +0,0 @@
|
|||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// A token is a string that has been normalised in some way.
|
||||
/// The normalised form is used for comparison, while the original form is used
|
||||
/// for applying `Operation`-s.
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Token<T>
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
/// The normalised form of the token used deriving the diff.
|
||||
pub normalised: T,
|
||||
|
||||
/// The original string, that should be inserted or deleted in the document.
|
||||
original: String,
|
||||
|
||||
/// Whether the token is joinable with the previous token.
|
||||
is_left_joinable: bool,
|
||||
|
||||
/// Whether the token is joinable with the next token.
|
||||
is_right_joinable: bool,
|
||||
}
|
||||
|
||||
impl From<&str> for Token<String> {
|
||||
fn from(text: &str) -> Self { Token::new(text.to_owned(), text.to_owned(), true, true) }
|
||||
}
|
||||
|
||||
impl<T> Token<T>
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
pub fn new(
|
||||
normalised: T,
|
||||
original: String,
|
||||
is_left_joinable: bool,
|
||||
is_right_joinable: bool,
|
||||
) -> Self {
|
||||
Token {
|
||||
normalised,
|
||||
original,
|
||||
is_left_joinable,
|
||||
is_right_joinable,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn original(&self) -> &str { &self.original }
|
||||
|
||||
pub fn normalised(&self) -> &T { &self.normalised }
|
||||
|
||||
pub fn get_original_length(&self) -> usize { self.original.chars().count() }
|
||||
|
||||
pub fn get_is_left_joinable(&self) -> bool { self.is_left_joinable }
|
||||
|
||||
pub fn get_is_right_joinable(&self) -> bool { self.is_right_joinable }
|
||||
}
|
||||
|
||||
impl<T> PartialEq for Token<T>
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
fn eq(&self, other: &Self) -> bool { self.normalised == other.normalised }
|
||||
}
|
||||
|
|
@ -1,60 +0,0 @@
|
|||
use super::token::Token;
|
||||
|
||||
/// Splits on word boundaries creating alternating words and whitespaces with
|
||||
/// the whitesspaces getting unique IDs.
|
||||
///
|
||||
/// ## Example
|
||||
///
|
||||
/// ```not_rust
|
||||
/// "Hi there!" -> ["Hi", " ", "there!"]
|
||||
/// ```
|
||||
pub fn word_tokenizer(text: &str) -> Vec<Token<String>> {
|
||||
let mut result: Vec<Token<String>> = Vec::new();
|
||||
|
||||
let mut previous_boundary_index = 0;
|
||||
let mut previous_char_is_whitespace = text.chars().next().is_none_or(char::is_whitespace);
|
||||
|
||||
for (i, c) in text.char_indices() {
|
||||
let is_current_char_whitespace = c.is_whitespace();
|
||||
if previous_char_is_whitespace != is_current_char_whitespace {
|
||||
result.push(text[previous_boundary_index..i].into());
|
||||
previous_boundary_index = i;
|
||||
}
|
||||
|
||||
previous_char_is_whitespace = is_current_char_whitespace;
|
||||
}
|
||||
|
||||
if previous_boundary_index < text.len() {
|
||||
result.push(text[previous_boundary_index..].into());
|
||||
}
|
||||
|
||||
if result.is_empty() {
|
||||
return result;
|
||||
}
|
||||
|
||||
for i in 0..result.len() - 1 {
|
||||
if result[i].original().chars().all(char::is_whitespace) {
|
||||
result[i].normalised = result[i].normalised().to_owned() + result[i + 1].original();
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use insta::assert_debug_snapshot;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_with_snapshots() {
|
||||
assert_debug_snapshot!(word_tokenizer("Hi there!"));
|
||||
|
||||
assert_debug_snapshot!(word_tokenizer(""));
|
||||
|
||||
assert_debug_snapshot!(word_tokenizer(" what? "));
|
||||
|
||||
assert_debug_snapshot!(word_tokenizer(" hello, \nwhere are you?"));
|
||||
}
|
||||
}
|
||||
|
|
@ -1,6 +0,0 @@
|
|||
pub mod common_prefix_len;
|
||||
pub mod common_suffix_len;
|
||||
pub mod find_longest_prefix_contained_within;
|
||||
pub mod merge_iters;
|
||||
pub mod side;
|
||||
pub mod string_builder;
|
||||
|
|
@ -1,47 +0,0 @@
|
|||
use core::ops::{Index, Range};
|
||||
|
||||
/// Given two lookups and ranges calculates the length of the common prefix.
|
||||
/// Copied from <https://github.com/mitsuhiko/similar/blob/7e15c44de11a1cd61e1149189929e189ef977fd8/src/algorithms/utils.rs>
|
||||
pub fn common_prefix_len<Old, New>(
|
||||
old: &Old,
|
||||
old_range: Range<usize>,
|
||||
new: &New,
|
||||
new_range: Range<usize>,
|
||||
) -> usize
|
||||
where
|
||||
Old: Index<usize> + ?Sized,
|
||||
New: Index<usize> + ?Sized,
|
||||
New::Output: PartialEq<Old::Output>,
|
||||
{
|
||||
new_range
|
||||
.zip(old_range)
|
||||
.take_while(|x| new[x.0] == old[x.1])
|
||||
.count()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_common_prefix_len() {
|
||||
assert_eq!(
|
||||
common_prefix_len("".as_bytes(), 0..0, "".as_bytes(), 0..0),
|
||||
0
|
||||
);
|
||||
assert_eq!(
|
||||
common_prefix_len("foobarbaz".as_bytes(), 0..9, "foobarblah".as_bytes(), 0..10),
|
||||
7
|
||||
);
|
||||
assert_eq!(
|
||||
common_prefix_len("foobarbaz".as_bytes(), 0..9, "blablabla".as_bytes(), 0..9),
|
||||
0
|
||||
);
|
||||
assert_eq!(
|
||||
common_prefix_len("foobarbaz".as_bytes(), 3..9, "foobarblah".as_bytes(), 3..10),
|
||||
4
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,48 +0,0 @@
|
|||
use core::ops::{Index, Range};
|
||||
|
||||
/// Given two lookups and ranges calculates the length of common suffix.
|
||||
/// Copied from <https://github.com/mitsuhiko/similar/blob/7e15c44de11a1cd61e1149189929e189ef977fd8/src/algorithms/utils.rs>
|
||||
pub fn common_suffix_len<Old, New>(
|
||||
old: &Old,
|
||||
old_range: Range<usize>,
|
||||
new: &New,
|
||||
new_range: Range<usize>,
|
||||
) -> usize
|
||||
where
|
||||
Old: Index<usize> + ?Sized,
|
||||
New: Index<usize> + ?Sized,
|
||||
New::Output: PartialEq<Old::Output>,
|
||||
{
|
||||
new_range
|
||||
.rev()
|
||||
.zip(old_range.rev())
|
||||
.take_while(|x| new[x.0] == old[x.1])
|
||||
.count()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_common_suffix_len() {
|
||||
assert_eq!(
|
||||
common_suffix_len("".as_bytes(), 0..0, "".as_bytes(), 0..0),
|
||||
0
|
||||
);
|
||||
assert_eq!(
|
||||
common_suffix_len("1234".as_bytes(), 0..4, "X0001234".as_bytes(), 0..8),
|
||||
4
|
||||
);
|
||||
assert_eq!(
|
||||
common_suffix_len("1234".as_bytes(), 0..4, "Xxxx".as_bytes(), 0..4),
|
||||
0
|
||||
);
|
||||
assert_eq!(
|
||||
common_suffix_len("1234".as_bytes(), 2..4, "01234".as_bytes(), 2..5),
|
||||
2
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,103 +0,0 @@
|
|||
use crate::Token;
|
||||
|
||||
/// Given two lists of tokens, returns `length` where `old` list somewhere
|
||||
/// within contains the `length` prefix of the `new` list.
|
||||
///
|
||||
/// ## Example
|
||||
///
|
||||
/// ```not_rust
|
||||
/// old: [0, 1, 9, 0, 2, 5]
|
||||
/// new: [9, 0, 2, 5, 1]
|
||||
/// ```
|
||||
/// > results in an length of 4
|
||||
///
|
||||
///
|
||||
/// ```not_rust
|
||||
/// old: [0, 1, 9, 0, 2, 5]
|
||||
/// new: [0, 2]
|
||||
/// ```
|
||||
/// > results in an length of 2
|
||||
///
|
||||
/// ```not_rust
|
||||
/// old: [0, 1, 9, 0, 2, 5]
|
||||
/// new: [0, 4]
|
||||
/// ```
|
||||
/// > results in an length of 1
|
||||
pub fn find_longest_prefix_contained_within<T>(old: &[Token<T>], new: &[Token<T>]) -> usize
|
||||
where
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
let max_possible = new.len().min(old.len());
|
||||
|
||||
for len in (1..=max_possible).rev() {
|
||||
let prefix = &new[..len];
|
||||
if old.windows(len).any(|window| window == prefix) {
|
||||
return len;
|
||||
}
|
||||
}
|
||||
|
||||
0
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_common_overlap() {
|
||||
assert_eq!(
|
||||
find_longest_prefix_contained_within(&["".into()], &["".into()]),
|
||||
1
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
find_longest_prefix_contained_within(
|
||||
&["a".into(), "b".into(), "c".into()],
|
||||
&["b".into(), "c".into(), "a".into()]
|
||||
),
|
||||
2
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
find_longest_prefix_contained_within(
|
||||
&["a".into(), "b".into(), "c".into()],
|
||||
&["b".into(), "c".into()]
|
||||
),
|
||||
2
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
find_longest_prefix_contained_within(
|
||||
&["a".into(), "b".into(), "c".into()],
|
||||
&["b".into()]
|
||||
),
|
||||
1
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
find_longest_prefix_contained_within(
|
||||
&["a".into(), "b".into(), "c".into(), "b".into(), "a".into()],
|
||||
&["b".into(), "a".into()]
|
||||
),
|
||||
2
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
find_longest_prefix_contained_within(
|
||||
&["a".into(), "a".into(), "a".into()],
|
||||
&["a".into(), "b".into(), "c".into()]
|
||||
),
|
||||
1
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
find_longest_prefix_contained_within(
|
||||
&["a".into(), "b".into(), "c".into()],
|
||||
&["d".into(), "e".into(), "a".into()]
|
||||
),
|
||||
0
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,86 +0,0 @@
|
|||
use core::{cmp::Ordering, iter::Peekable};
|
||||
|
||||
pub struct MergeAscending<L, R, F, O>
|
||||
where
|
||||
L: Iterator<Item = R::Item>,
|
||||
R: Iterator,
|
||||
F: Fn(&R::Item) -> O,
|
||||
O: PartialOrd,
|
||||
{
|
||||
left: Peekable<L>,
|
||||
right: Peekable<R>,
|
||||
get_key: F,
|
||||
}
|
||||
|
||||
impl<L, R, F, O> MergeAscending<L, R, F, O>
|
||||
where
|
||||
L: Iterator<Item = R::Item>,
|
||||
R: Iterator,
|
||||
F: Fn(&R::Item) -> O,
|
||||
O: PartialOrd,
|
||||
{
|
||||
fn new(left: L, right: R, get_key: F) -> Self {
|
||||
MergeAscending {
|
||||
left: left.peekable(),
|
||||
right: right.peekable(),
|
||||
get_key,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<L, R, F, O> Iterator for MergeAscending<L, R, F, O>
|
||||
where
|
||||
L: Iterator<Item = R::Item>,
|
||||
R: Iterator,
|
||||
F: Fn(&R::Item) -> O,
|
||||
O: PartialOrd,
|
||||
{
|
||||
type Item = L::Item;
|
||||
|
||||
fn next(&mut self) -> Option<L::Item> {
|
||||
let order = match (self.left.peek(), self.right.peek()) {
|
||||
(Some(l), Some(r)) => (self.get_key)(l).partial_cmp(&(self.get_key)(r)),
|
||||
(Some(_), None) => Some(Ordering::Less),
|
||||
(None, Some(_)) => Some(Ordering::Greater),
|
||||
(None, None) => return None,
|
||||
};
|
||||
|
||||
match order {
|
||||
Some(Ordering::Less | Ordering::Equal) | None => self.left.next(),
|
||||
Some(Ordering::Greater) => self.right.next(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub trait MergeSorted: Iterator {
|
||||
fn merge_sorted_by_key<R, F, O>(self, other: R, get_key: F) -> MergeAscending<Self, R, F, O>
|
||||
where
|
||||
Self: Sized,
|
||||
R: Iterator<Item = Self::Item>,
|
||||
F: Fn(&Self::Item) -> O,
|
||||
O: PartialOrd,
|
||||
{
|
||||
MergeAscending::new(self, other, get_key)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: ?Sized> MergeSorted for T where T: Iterator {}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_merge_sorted_by_key() {
|
||||
let left = [9, 7, 5, 3, 1];
|
||||
let right = [7, 6, 5, 4, 3];
|
||||
|
||||
let result: Vec<i32> = left
|
||||
.into_iter()
|
||||
.merge_sorted_by_key(right.into_iter(), |x| -1 * x)
|
||||
.collect();
|
||||
assert_eq!(result, vec![9, 7, 7, 6, 5, 5, 4, 3, 3, 1]);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,16 +0,0 @@
|
|||
use std::fmt::Display;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum Side {
|
||||
Left,
|
||||
Right,
|
||||
}
|
||||
|
||||
impl Display for Side {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Side::Left => write!(f, "Left"),
|
||||
Side::Right => write!(f, "Right"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,111 +0,0 @@
|
|||
use core::ops::Range;
|
||||
|
||||
/// A helper for building a string in order based on an original string and a
|
||||
/// series of insertions and deletions applied to it. It is safe to use with
|
||||
/// UTF-8 strings as all operations are based on character indices.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct StringBuilder<'a> {
|
||||
original: &'a str,
|
||||
last_old_char_index: usize,
|
||||
buffer: String,
|
||||
}
|
||||
|
||||
impl StringBuilder<'_> {
|
||||
pub fn new(original: &str) -> StringBuilder<'_> {
|
||||
StringBuilder {
|
||||
original,
|
||||
last_old_char_index: 0,
|
||||
buffer: String::with_capacity(original.len()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Insert a string at the given index after copying the original string up
|
||||
/// to that index from the last insertion or deletion.
|
||||
pub fn insert(&mut self, from: usize, text: &str) {
|
||||
self.copy_until(from);
|
||||
self.buffer.push_str(text);
|
||||
}
|
||||
|
||||
/// Delete a string at the given index after copying the original string up
|
||||
/// to that index from the last insertion or deletion.
|
||||
pub fn delete(&mut self, range: core::ops::Range<usize>) {
|
||||
self.copy_until(range.start);
|
||||
self.last_old_char_index += range.len();
|
||||
}
|
||||
|
||||
fn copy_until(&mut self, index: usize) {
|
||||
let current_char_count = self.buffer.chars().count();
|
||||
debug_assert!(
|
||||
index >= current_char_count,
|
||||
"String builder only support building in order"
|
||||
);
|
||||
|
||||
let jump = index - current_char_count;
|
||||
|
||||
self.buffer.push_str(
|
||||
&self
|
||||
.original
|
||||
.chars()
|
||||
.skip(self.last_old_char_index)
|
||||
.take(jump)
|
||||
.collect::<String>(),
|
||||
);
|
||||
self.last_old_char_index += jump;
|
||||
}
|
||||
|
||||
/// Finish building the string after copying the remaining original string
|
||||
/// since the last insertion or deletion.
|
||||
pub fn build(mut self) -> String {
|
||||
self.buffer.push_str(
|
||||
&self
|
||||
.original
|
||||
.chars()
|
||||
.skip(self.last_old_char_index)
|
||||
.collect::<String>(),
|
||||
);
|
||||
|
||||
self.buffer
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn get_slice(&self, range: Range<usize>) -> String {
|
||||
let result = self
|
||||
.buffer
|
||||
.chars()
|
||||
.chain(self.original.chars().skip(self.last_old_char_index))
|
||||
.skip(range.start)
|
||||
.take(range.end - range.start)
|
||||
.collect::<String>();
|
||||
|
||||
debug_assert_eq!(result.chars().count(), range.len(), "Range out of bounds",);
|
||||
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_string_builder() {
|
||||
let original = "aaa bbb ccc";
|
||||
let mut builder = StringBuilder::new(original);
|
||||
|
||||
builder.insert(0, "ddd ");
|
||||
builder.delete(4..8);
|
||||
builder.insert(11, " eee");
|
||||
|
||||
assert_eq!(builder.build(), "ddd bbb ccc eee");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_string_builder2() {
|
||||
let original = "abcde";
|
||||
let mut builder = StringBuilder::new(original);
|
||||
|
||||
builder.delete(1..4);
|
||||
|
||||
assert_eq!(builder.build(), "ae");
|
||||
}
|
||||
}
|
||||
|
|
@ -1,103 +0,0 @@
|
|||
use pretty_assertions::assert_eq;
|
||||
use reconcile::{CursorPosition, TextWithCursors};
|
||||
use serde::Deserialize;
|
||||
|
||||
/// `ExampleDocument` represents a test case for the reconciliation process.
|
||||
/// It contains a parent string, left and right strings with cursor positions,
|
||||
/// and the expected result after reconciliation.
|
||||
///
|
||||
/// '|' characters in the left, right, and expected strings are treated as
|
||||
/// cursor positions and are converted into `CursorPosition` objects.
|
||||
#[derive(Debug, Deserialize, Clone, PartialEq, Eq)]
|
||||
pub struct ExampleDocument {
|
||||
parent: String,
|
||||
left: String,
|
||||
right: String,
|
||||
expected: String,
|
||||
}
|
||||
|
||||
impl ExampleDocument {
|
||||
#[must_use]
|
||||
pub fn parent(&self) -> String { self.parent.clone() }
|
||||
|
||||
#[must_use]
|
||||
pub fn left(&self) -> TextWithCursors<'static> {
|
||||
ExampleDocument::string_to_text_with_cursors(&self.left)
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn right(&self) -> TextWithCursors<'static> {
|
||||
ExampleDocument::string_to_text_with_cursors(&self.right)
|
||||
}
|
||||
|
||||
/// Asserts that the result string matches the expected string,
|
||||
/// including cursor positions.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// If the result string does not match the expected string, the program
|
||||
/// will panic.
|
||||
pub fn assert_eq(&self, result: &TextWithCursors<'static>) {
|
||||
let result_str = ExampleDocument::text_with_cursors_to_string(result);
|
||||
assert_eq!(
|
||||
self.expected, result_str,
|
||||
"Left (expected) isn't equal to right (actual). Actual: ```\n{result_str}```",
|
||||
);
|
||||
}
|
||||
|
||||
/// Asserts that the result string matches the expected string,
|
||||
/// ignoring cursor positions.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// If the result string does not match the expected string, the program
|
||||
/// will panic.
|
||||
pub fn assert_eq_without_cursors(&self, result: &str) {
|
||||
let expected = ExampleDocument::string_to_text_with_cursors(&self.expected).text;
|
||||
assert_eq!(
|
||||
expected, result,
|
||||
"Left (expected) isn't equal to right (actual), Actual: ```\n{result}```",
|
||||
);
|
||||
}
|
||||
|
||||
fn text_with_cursors_to_string(text: &TextWithCursors<'_>) -> String {
|
||||
let mut result = text.text.clone().into_owned();
|
||||
for (i, cursor) in text.cursors.iter().enumerate() {
|
||||
assert!(
|
||||
cursor.char_index <= result.len(), // equals in case of insert at the end
|
||||
"Cursor index out of bounds: {} > {} when testing for '{result}'",
|
||||
cursor.char_index,
|
||||
result.len()
|
||||
);
|
||||
|
||||
result.insert(
|
||||
result
|
||||
.char_indices()
|
||||
.nth(cursor.char_index + i)
|
||||
.map_or_else(|| result.len(), |(byte_index, _)| byte_index), /* find the utf8 char index of the insert
|
||||
* in byte index */
|
||||
'|',
|
||||
);
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
fn string_to_text_with_cursors(text: &str) -> TextWithCursors<'static> {
|
||||
let cursors = Self::parse_cursors(text);
|
||||
let text = text.replace('|', "");
|
||||
TextWithCursors::new_owned(text, cursors)
|
||||
}
|
||||
|
||||
fn parse_cursors(text: &str) -> Vec<CursorPosition> {
|
||||
let mut cursors = Vec::new();
|
||||
for (i, c) in text.chars().enumerate() {
|
||||
if c == '|' {
|
||||
cursors.push(CursorPosition {
|
||||
id: 0,
|
||||
char_index: i - cursors.len(),
|
||||
});
|
||||
}
|
||||
}
|
||||
cursors
|
||||
}
|
||||
}
|
||||
|
|
@ -1 +0,0 @@
|
|||
The `|` characters denote cursor positions which are stripped before the actual reconcile logic is run
|
||||
|
|
@ -1,31 +0,0 @@
|
|||
# Both delete the same range
|
||||
parent: original_1 original_2 original_3 original_4 original_5
|
||||
left: original_1 original_5|
|
||||
right: "|original_1 original_5"
|
||||
expected: "|original_1 original_5|"
|
||||
|
||||
---
|
||||
# Both delete a range and one range contains the other
|
||||
parent: original_1 original_2 original_3 original_4 original_5
|
||||
left: original_1 original_5
|
||||
right: original_1 original_4 original_5
|
||||
expected: original_1 original_5
|
||||
|
||||
---
|
||||
# Deleting overlapping ranges
|
||||
parent: original_1 original_2 original_3 original_4 original_5
|
||||
left: original_1 original_4| original_5
|
||||
right: original_1 original_2| original_5
|
||||
expected: original_1|| original_5
|
||||
|
||||
---
|
||||
parent: long text with one big delete and many small
|
||||
left: long small
|
||||
right: long with big and small
|
||||
expected: long small
|
||||
|
||||
---
|
||||
parent: long text where the cursor has to be clamped after delete
|
||||
left: long text where the cursor has to be clamped after delete|
|
||||
right: long text where the cursor
|
||||
expected: long text where the cursor|
|
||||
|
|
@ -1,12 +0,0 @@
|
|||
# One deleted a large range, the other deleted subranges and inserted as well
|
||||
parent: original_1 original_2 original_3 original_4 original_5
|
||||
left: original_1 original_5
|
||||
right: original_1 edit_1 original_3 edit_2 original_5
|
||||
expected: original_1 edit_1 edit_2 original_5
|
||||
|
||||
---
|
||||
# One deleted a large range, the other inserted and deleted a partially overlapping range
|
||||
parent: original_1 original_2 original_3 original_4 original_5
|
||||
left: original_1 original_5
|
||||
right: original_1 edit_1 original_3 edit_2
|
||||
expected: original_1 edit_1 edit_2
|
||||
|
|
@ -1,24 +0,0 @@
|
|||
# Both inserted the same prefix; this should get deduplicateed
|
||||
parent: "hi "
|
||||
left: "hi there "
|
||||
right: "hi there my friend "
|
||||
expected: "hi there my friend "
|
||||
|
||||
---
|
||||
# The prefix of the 2nd appears on the 1st so it shouldn't get duplicatelicated
|
||||
parent: "hi "
|
||||
left: "hi there you "
|
||||
right: "hi there my friend "
|
||||
expected: "hi there my friend you "
|
||||
|
||||
---
|
||||
parent: a
|
||||
left: a b c
|
||||
right: a b c d
|
||||
expected: a b c d
|
||||
|
||||
---
|
||||
parent: a
|
||||
left: abc
|
||||
right: abcd
|
||||
expected: abcabcd
|
||||
|
|
@ -1,63 +0,0 @@
|
|||
parent: Hello!
|
||||
left: |
|
||||
Hello there!
|
||||
|
||||
How are you?
|
||||
|
||||
right: |
|
||||
Hello there!
|
||||
|
||||
Best,
|
||||
Andras
|
||||
|
||||
expected: |
|
||||
Hello there!
|
||||
|
||||
Best,
|
||||
Andras
|
||||
|
||||
|
||||
How are you?
|
||||
|
||||
---
|
||||
parent: |
|
||||
- my list
|
||||
- 2nd item
|
||||
- 3rd item
|
||||
|
||||
left: |
|
||||
- my list
|
||||
- 2nd item
|
||||
- nested list
|
||||
- very nested list
|
||||
- 3rd item
|
||||
|
||||
right: |
|
||||
- my list
|
||||
- nested list
|
||||
- 2nd item
|
||||
- 3rd item
|
||||
- another nested list
|
||||
|
||||
expected: |
|
||||
- my list
|
||||
- nested list
|
||||
- 2nd item
|
||||
- nested list
|
||||
- very nested list
|
||||
- 3rd item
|
||||
- another nested list
|
||||
|
||||
---
|
||||
parent: |
|
||||
a
|
||||
a
|
||||
left: |
|
||||
a|
|
||||
a
|
||||
right: |
|
||||
a|
|
||||
a
|
||||
expected: |
|
||||
a||
|
||||
a
|
||||
|
|
@ -1,19 +0,0 @@
|
|||
# Both replaced one token but the tokens are different
|
||||
parent: original_1 original_2 original_3
|
||||
left: original_1 edit_1| original_3
|
||||
right: original_1 original_2| edit_2
|
||||
expected: original_1 edit_1|| edit_2
|
||||
|
||||
---
|
||||
# Both replace the same token with the same value
|
||||
parent: original_1 original_2 original_3
|
||||
left: original_1 edit_1| original_3
|
||||
right: original_1 edit_1 original_3|
|
||||
expected: original_1 edit_1| original_3|
|
||||
|
||||
---
|
||||
# Both replace the same token with different value
|
||||
parent: original_1 original_2 original_3
|
||||
left: original_1 edit_1| original_3
|
||||
right: original_1 conflicting_edit_1| original_3
|
||||
expected: original_1 conflicting_edit_1| edit_1| original_3
|
||||
|
|
@ -1,10 +0,0 @@
|
|||
parent: Meeting at 2pm in 会议室
|
||||
left: Meeting at |3pm in 会议室
|
||||
right: Team meeting at 2pm in conference room|
|
||||
expected: Team meeting at |3pm in conference room|
|
||||
|
||||
---
|
||||
parent: " "
|
||||
left: "it’|s utf-8!"
|
||||
right: " "
|
||||
expected: "it’|s utf-8!"
|
||||
|
|
@ -1,130 +0,0 @@
|
|||
parent: You're Annual Savings Statement is available in our online portal
|
||||
left: Your| annual record is available in our online portal|
|
||||
right: You're Annual Savings information| is available online
|
||||
expected: Your| annual record information| is available online|
|
||||
|
||||
---
|
||||
parent: Party A shall pay Party B
|
||||
left: Party C shall pay Party B
|
||||
right: Party A shall receive from Party B
|
||||
expected: Party C shall receive from Party B
|
||||
|
||||
---
|
||||
parent:
|
||||
left: hi my friend|
|
||||
right: hi there|
|
||||
expected: hi my friend| there|
|
||||
|
||||
---
|
||||
parent: ""
|
||||
left: ""
|
||||
right: ""
|
||||
expected: ""
|
||||
|
||||
---
|
||||
parent: ""
|
||||
left: "|"
|
||||
right: "|"
|
||||
expected: "||"
|
||||
|
||||
---
|
||||
parent: Buy milk and eggs
|
||||
left: Buy organic milk| and eggs|
|
||||
right: Buy milk and eggs| and bread
|
||||
expected: Buy organic milk| and eggs|| and bread
|
||||
|
||||
---
|
||||
parent: Send the report to the team
|
||||
left: Send the |detailed report to the |entire |team
|
||||
right: Send the |quarterly |detailed report to the team
|
||||
expected: Send the |detailed |quarterly |detailed report to the |entire |team
|
||||
|
||||
---
|
||||
parent: Ready, Set go
|
||||
left: Ready! Set go|
|
||||
right: Ready, Set, go!|
|
||||
expected: Ready! Set, go!||
|
||||
|
||||
---
|
||||
parent: "Total: $100"
|
||||
left: "Total: |$150"
|
||||
right: "Total: |€100"
|
||||
expected: "Total: |$150 |€100"
|
||||
|
||||
---
|
||||
parent: Start middle end
|
||||
left: Start [important] middle end|
|
||||
right: Start middle [critical] end|
|
||||
expected: Start [important] middle [critical] end||
|
||||
|
||||
---
|
||||
parent: marketplace
|
||||
left: market| place
|
||||
right: market|space
|
||||
expected: market| placemarket|space
|
||||
|
||||
---
|
||||
parent: A B C D
|
||||
left: A X B D|
|
||||
right: A B Y|
|
||||
expected: A X B |Y|
|
||||
|
||||
---
|
||||
parent: Please submit your assignment by Friday
|
||||
left: Please submit your |completed |assignment by Friday
|
||||
right: Please submit your assignment |online |by Friday
|
||||
expected: Please submit your |completed |assignment |online |by Friday
|
||||
|
||||
---
|
||||
parent: "a b "
|
||||
left: "c d "
|
||||
right: "a b c d "
|
||||
expected: "c d c d "
|
||||
|
||||
---
|
||||
parent: a b c d e
|
||||
left: a e|
|
||||
right: a c e|
|
||||
expected: a e||
|
||||
|
||||
---
|
||||
parent: a 0 1 2 b
|
||||
left: a 0 1| 2 b
|
||||
right: a b|
|
||||
expected: a| b|
|
||||
|
||||
---
|
||||
parent: a 0 1 2 b
|
||||
left: "|a b"
|
||||
right: "|a E 1 F b"
|
||||
expected: "||a E F b"
|
||||
|
||||
---
|
||||
parent: a this one delete b
|
||||
left: a b|
|
||||
right: a my one change b|
|
||||
expected: a my change b||
|
||||
|
||||
---
|
||||
parent: this stays, this is one big delete, don't touch this
|
||||
left: this stays, don't touch this|
|
||||
right: this stays, my one change, don't touch this|
|
||||
expected: this stays, my change, don't touch this||
|
||||
|
||||
---
|
||||
parent: 1 2 3 4 5 6
|
||||
left: 1| 6
|
||||
right: 1 2 4|
|
||||
expected: 1||
|
||||
|
||||
---
|
||||
parent: hello world
|
||||
left: hi, world
|
||||
right: hello my friend!
|
||||
expected: hi, my friend!
|
||||
|
||||
---
|
||||
parent: a a
|
||||
left: a
|
||||
right: a
|
||||
expected: a
|
||||
|
|
@ -1,742 +0,0 @@
|
|||
# Reserved Strings
|
||||
#
|
||||
# Strings which may be used elsewhere in code
|
||||
|
||||
undefined
|
||||
undef
|
||||
null
|
||||
NULL
|
||||
(null)
|
||||
nil
|
||||
NIL
|
||||
true
|
||||
false
|
||||
True
|
||||
False
|
||||
TRUE
|
||||
FALSE
|
||||
None
|
||||
hasOwnProperty
|
||||
then
|
||||
constructor
|
||||
\
|
||||
\\
|
||||
|
||||
# Numeric Strings
|
||||
#
|
||||
# Strings which can be interpreted as numeric
|
||||
|
||||
0
|
||||
1
|
||||
1.00
|
||||
$1.00
|
||||
1/2
|
||||
1E2
|
||||
1E02
|
||||
1E+02
|
||||
-1
|
||||
-1.00
|
||||
-$1.00
|
||||
-1/2
|
||||
-1E2
|
||||
-1E02
|
||||
-1E+02
|
||||
1/0
|
||||
0/0
|
||||
-2147483648/-1
|
||||
-9223372036854775808/-1
|
||||
-0
|
||||
-0.0
|
||||
+0
|
||||
+0.0
|
||||
0.00
|
||||
0..0
|
||||
.
|
||||
0.0.0
|
||||
0,00
|
||||
0,,0
|
||||
,
|
||||
0,0,0
|
||||
0.0/0
|
||||
1.0/0.0
|
||||
0.0/0.0
|
||||
1,0/0,0
|
||||
0,0/0,0
|
||||
--1
|
||||
-
|
||||
-.
|
||||
-,
|
||||
999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999
|
||||
NaN
|
||||
Infinity
|
||||
-Infinity
|
||||
INF
|
||||
1#INF
|
||||
-1#IND
|
||||
1#QNAN
|
||||
1#SNAN
|
||||
1#IND
|
||||
0x0
|
||||
0xffffffff
|
||||
0xffffffffffffffff
|
||||
0xabad1dea
|
||||
123456789012345678901234567890123456789
|
||||
1,000.00
|
||||
1 000.00
|
||||
1'000.00
|
||||
1,000,000.00
|
||||
1 000 000.00
|
||||
1'000'000.00
|
||||
1.000,00
|
||||
1 000,00
|
||||
1'000,00
|
||||
1.000.000,00
|
||||
1 000 000,00
|
||||
1'000'000,00
|
||||
01000
|
||||
08
|
||||
09
|
||||
2.2250738585072011e-308
|
||||
|
||||
# Special Characters
|
||||
#
|
||||
# ASCII punctuation. All of these characters may need to be escaped in some
|
||||
# contexts. Divided into three groups based on (US-layout) keyboard position.
|
||||
|
||||
,./;'[]\-=
|
||||
<>?:"{}|_+
|
||||
!@#$%^&*()`~
|
||||
|
||||
# Non-whitespace C0 controls: U+0001 through U+0008, U+000E through U+001F,
|
||||
# and U+007F (DEL)
|
||||
# Often forbidden to appear in various text-based file formats (e.g. XML),
|
||||
# or reused for internal delimiters on the theory that they should never
|
||||
# appear in input.
|
||||
# The next line may appear to be blank or mojibake in some viewers.
|
||||
|
||||
|
||||
# Non-whitespace C1 controls: U+0080 through U+0084 and U+0086 through U+009F.
|
||||
# Commonly misinterpreted as additional graphic characters.
|
||||
# The next line may appear to be blank, mojibake, or dingbats in some viewers.
|
||||
|
||||
|
||||
# Whitespace: all of the characters with category Zs, Zl, or Zp (in Unicode
|
||||
# version 8.0.0), plus U+0009 (HT), U+000B (VT), U+000C (FF), U+0085 (NEL),
|
||||
# and U+200B (ZERO WIDTH SPACE), which are in the C categories but are often
|
||||
# treated as whitespace in some contexts.
|
||||
# This file unfortunately cannot express strings containing
|
||||
# U+0000, U+000A, or U+000D (NUL, LF, CR).
|
||||
# The next line may appear to be blank or mojibake in some viewers.
|
||||
# The next line may be flagged for "trailing whitespace" in some viewers.
|
||||
|
||||
|
||||
# Unicode additional control characters: all of the characters with
|
||||
# general category Cf (in Unicode 8.0.0).
|
||||
# The next line may appear to be blank or mojibake in some viewers.
|
||||
|
||||
|
||||
# "Byte order marks", U+FEFF and U+FFFE, each on its own line.
|
||||
# The next two lines may appear to be blank or mojibake in some viewers.
|
||||
|
||||
|
||||
|
||||
# Unicode Symbols
|
||||
#
|
||||
# Strings which contain common unicode symbols (e.g. smart quotes)
|
||||
|
||||
Ω≈ç√∫˜µ≤≥÷
|
||||
åß∂ƒ©˙∆˚¬…æ
|
||||
œ∑´®†¥¨ˆøπ“‘
|
||||
¡™£¢∞§¶•ªº–≠
|
||||
¸˛Ç◊ı˜Â¯˘¿
|
||||
ÅÍÎÏ˝ÓÔÒÚÆ☃
|
||||
Œ„´‰ˇÁ¨ˆØ∏”’
|
||||
`⁄€‹›fifl‡°·‚—±
|
||||
⅛⅜⅝⅞
|
||||
ЁЂЃЄЅІЇЈЉЊЋЌЍЎЏАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюя
|
||||
٠١٢٣٤٥٦٧٨٩
|
||||
|
||||
# Unicode Subscript/Superscript/Accents
|
||||
#
|
||||
# Strings which contain unicode subscripts/superscripts; can cause rendering issues
|
||||
|
||||
⁰⁴⁵
|
||||
₀₁₂
|
||||
⁰⁴⁵₀₁₂
|
||||
ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็ ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็ ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็
|
||||
|
||||
# Quotation Marks
|
||||
#
|
||||
# Strings which contain misplaced quotation marks; can cause encoding errors
|
||||
|
||||
'
|
||||
"
|
||||
''
|
||||
""
|
||||
'"'
|
||||
"''''"'"
|
||||
"'"'"''''"
|
||||
<foo val=“bar” />
|
||||
<foo val=“bar” />
|
||||
<foo val=”bar“ />
|
||||
<foo val=`bar' />
|
||||
|
||||
# Two-Byte Characters
|
||||
#
|
||||
# Strings which contain two-byte characters: can cause rendering issues or character-length issues
|
||||
|
||||
田中さんにあげて下さい
|
||||
パーティーへ行かないか
|
||||
和製漢語
|
||||
部落格
|
||||
사회과학원 어학연구소
|
||||
찦차를 타고 온 펲시맨과 쑛다리 똠방각하
|
||||
社會科學院語學研究所
|
||||
울란바토르
|
||||
𠜎𠜱𠝹𠱓𠱸𠲖𠳏
|
||||
|
||||
# Strings which contain two-byte letters: can cause issues with naïve UTF-16 capitalizers which think that 16 bits == 1 character
|
||||
|
||||
𐐜 𐐔𐐇𐐝𐐀𐐡𐐇𐐓 𐐙𐐊𐐡𐐝𐐓/𐐝𐐇𐐗𐐊𐐤𐐔 𐐒𐐋𐐗 𐐒𐐌 𐐜 𐐡𐐀𐐖𐐇𐐤𐐓𐐝 𐐱𐑂 𐑄 𐐔𐐇𐐝𐐀𐐡𐐇𐐓 𐐏𐐆𐐅𐐤𐐆𐐚𐐊𐐡𐐝𐐆𐐓𐐆
|
||||
|
||||
# Special Unicode Characters Union
|
||||
#
|
||||
# A super string recommended by VMware Inc. Globalization Team: can effectively cause rendering issues or character-length issues to validate product globalization readiness.
|
||||
#
|
||||
# 表 CJK_UNIFIED_IDEOGRAPHS (U+8868)
|
||||
# ポ KATAKANA LETTER PO (U+30DD)
|
||||
# あ HIRAGANA LETTER A (U+3042)
|
||||
# A LATIN CAPITAL LETTER A (U+0041)
|
||||
# 鷗 CJK_UNIFIED_IDEOGRAPHS (U+9DD7)
|
||||
# Œ LATIN SMALL LIGATURE OE (U+0153)
|
||||
# é LATIN SMALL LETTER E WITH ACUTE (U+00E9)
|
||||
# B FULLWIDTH LATIN CAPITAL LETTER B (U+FF22)
|
||||
# 逍 CJK_UNIFIED_IDEOGRAPHS (U+900D)
|
||||
# Ü LATIN SMALL LETTER U WITH DIAERESIS (U+00FC)
|
||||
# ß LATIN SMALL LETTER SHARP S (U+00DF)
|
||||
# ª FEMININE ORDINAL INDICATOR (U+00AA)
|
||||
# ą LATIN SMALL LETTER A WITH OGONEK (U+0105)
|
||||
# ñ LATIN SMALL LETTER N WITH TILDE (U+00F1)
|
||||
# 丂 CJK_UNIFIED_IDEOGRAPHS (U+4E02)
|
||||
# 㐀 CJK Ideograph Extension A, First (U+3400)
|
||||
# 𠀀 CJK Ideograph Extension B, First (U+20000)
|
||||
|
||||
表ポあA鷗ŒéB逍Üߪąñ丂㐀𠀀
|
||||
|
||||
# Changing length when lowercased
|
||||
#
|
||||
# Characters which increase in length (2 to 3 bytes) when lowercased
|
||||
# Credit: https://twitter.com/jifa/status/625776454479970304
|
||||
|
||||
Ⱥ
|
||||
Ⱦ
|
||||
|
||||
# Japanese Emoticons
|
||||
#
|
||||
# Strings which consists of Japanese-style emoticons which are popular on the web
|
||||
|
||||
ヽ༼ຈل͜ຈ༽ノ ヽ༼ຈل͜ຈ༽ノ
|
||||
(。◕ ∀ ◕。)
|
||||
`ィ(´∀`∩
|
||||
__ロ(,_,*)
|
||||
・( ̄∀ ̄)・:*:
|
||||
゚・✿ヾ╲(。◕‿◕。)╱✿・゚
|
||||
,。・:*:・゜’( ☻ ω ☻ )。・:*:・゜’
|
||||
(╯°□°)╯︵ ┻━┻)
|
||||
(ノಥ益ಥ)ノ ┻━┻
|
||||
┬─┬ノ( º _ ºノ)
|
||||
( ͡° ͜ʖ ͡°)
|
||||
¯\_(ツ)_/¯
|
||||
|
||||
# Emoji
|
||||
#
|
||||
# Strings which contain Emoji; should be the same behavior as two-byte characters, but not always
|
||||
|
||||
😍
|
||||
👩🏽
|
||||
👨🦰 👨🏿🦰 👨🦱 👨🏿🦱 🦹🏿♂️
|
||||
👾 🙇 💁 🙅 🙆 🙋 🙎 🙍
|
||||
🐵 🙈 🙉 🙊
|
||||
❤️ 💔 💌 💕 💞 💓 💗 💖 💘 💝 💟 💜 💛 💚 💙
|
||||
✋🏿 💪🏿 👐🏿 🙌🏿 👏🏿 🙏🏿
|
||||
👨👩👦 👨👩👧👦 👨👨👦 👩👩👧 👨👦 👨👧👦 👩👦 👩👧👦
|
||||
🚾 🆒 🆓 🆕 🆖 🆗 🆙 🏧
|
||||
0️⃣ 1️⃣ 2️⃣ 3️⃣ 4️⃣ 5️⃣ 6️⃣ 7️⃣ 8️⃣ 9️⃣ 🔟
|
||||
|
||||
# Regional Indicator Symbols
|
||||
#
|
||||
# Regional Indicator Symbols can be displayed differently across
|
||||
# fonts, and have a number of special behaviors
|
||||
|
||||
🇺🇸🇷🇺🇸 🇦🇫🇦🇲🇸
|
||||
🇺🇸🇷🇺🇸🇦🇫🇦🇲
|
||||
🇺🇸🇷🇺🇸🇦
|
||||
|
||||
# Unicode Numbers
|
||||
#
|
||||
# Strings which contain unicode numbers; if the code is localized, it should see the input as numeric
|
||||
|
||||
123
|
||||
١٢٣
|
||||
|
||||
# Right-To-Left Strings
|
||||
#
|
||||
# Strings which contain text that should be rendered RTL if possible (e.g. Arabic, Hebrew)
|
||||
|
||||
ثم نفس سقطت وبالتحديد،, جزيرتي باستخدام أن دنو. إذ هنا؟ الستار وتنصيب كان. أهّل ايطاليا، بريطانيا-فرنسا قد أخذ. سليمان، إتفاقية بين ما, يذكر الحدود أي بعد, معاملة بولندا، الإطلاق عل إيو.
|
||||
בְּרֵאשִׁית, בָּרָא אֱלֹהִים, אֵת הַשָּׁמַיִם, וְאֵת הָאָרֶץ
|
||||
הָיְתָהtestالصفحات التّحول
|
||||
﷽
|
||||
ﷺ
|
||||
مُنَاقَشَةُ سُبُلِ اِسْتِخْدَامِ اللُّغَةِ فِي النُّظُمِ الْقَائِمَةِ وَفِيم يَخُصَّ التَّطْبِيقَاتُ الْحاسُوبِيَّةُ،
|
||||
الكل في المجمو عة (5)
|
||||
|
||||
# Ogham Text
|
||||
#
|
||||
# The only unicode alphabet to use a space which isn't empty but should still act like a space.
|
||||
|
||||
᚛ᚄᚓᚐᚋᚒᚄ ᚑᚄᚂᚑᚏᚅ᚜
|
||||
᚛ ᚜
|
||||
|
||||
# Trick Unicode
|
||||
#
|
||||
# Strings which contain unicode with unusual properties (e.g. Right-to-left override) (c.f. http://www.unicode.org/charts/PDF/U2000.pdf)
|
||||
|
||||
test
|
||||
test
|
||||
test
|
||||
testtest
|
||||
test
|
||||
|
||||
# Zalgo Text
|
||||
#
|
||||
# Strings which contain "corrupted" text. The corruption will not appear in non-HTML text, however. (via http://www.eeemo.net)
|
||||
|
||||
Ṱ̺̺̕o͞ ̷i̲̬͇̪͙n̝̗͕v̟̜̘̦͟o̶̙̰̠kè͚̮̺̪̹̱̤ ̖t̝͕̳̣̻̪͞h̼͓̲̦̳̘̲e͇̣̰̦̬͎ ̢̼̻̱̘h͚͎͙̜̣̲ͅi̦̲̣̰̤v̻͍e̺̭̳̪̰-m̢iͅn̖̺̞̲̯̰d̵̼̟͙̩̼̘̳ ̞̥̱̳̭r̛̗̘e͙p͠r̼̞̻̭̗e̺̠̣͟s̘͇̳͍̝͉e͉̥̯̞̲͚̬͜ǹ̬͎͎̟̖͇̤t͍̬̤͓̼̭͘ͅi̪̱n͠g̴͉ ͏͉ͅc̬̟h͡a̫̻̯͘o̫̟̖͍̙̝͉s̗̦̲.̨̹͈̣
|
||||
̡͓̞ͅI̗̘̦͝n͇͇͙v̮̫ok̲̫̙͈i̖͙̭̹̠̞n̡̻̮̣̺g̲͈͙̭͙̬͎ ̰t͔̦h̞̲e̢̤ ͍̬̲͖f̴̘͕̣è͖ẹ̥̩l͖͔͚i͓͚̦͠n͖͍̗͓̳̮g͍ ̨o͚̪͡f̘̣̬ ̖̘͖̟͙̮c҉͔̫͖͓͇͖ͅh̵̤̣͚͔á̗̼͕ͅo̼̣̥s̱͈̺̖̦̻͢.̛̖̞̠̫̰
|
||||
̗̺͖̹̯͓Ṯ̤͍̥͇͈h̲́e͏͓̼̗̙̼̣͔ ͇̜̱̠͓͍ͅN͕͠e̗̱z̘̝̜̺͙p̤̺̹͍̯͚e̠̻̠͜r̨̤͍̺̖͔̖̖d̠̟̭̬̝͟i̦͖̩͓͔̤a̠̗̬͉̙n͚͜ ̻̞̰͚ͅh̵͉i̳̞v̢͇ḙ͎͟-҉̭̩̼͔m̤̭̫i͕͇̝̦n̗͙ḍ̟ ̯̲͕͞ǫ̟̯̰̲͙̻̝f ̪̰̰̗̖̭̘͘c̦͍̲̞͍̩̙ḥ͚a̮͎̟̙͜ơ̩̹͎s̤.̝̝ ҉Z̡̖̜͖̰̣͉̜a͖̰͙̬͡l̲̫̳͍̩g̡̟̼̱͚̞̬ͅo̗͜.̟
|
||||
̦H̬̤̗̤͝e͜ ̜̥̝̻͍̟́w̕h̖̯͓o̝͙̖͎̱̮ ҉̺̙̞̟͈W̷̼̭a̺̪͍į͈͕̭͙̯̜t̶̼̮s̘͙͖̕ ̠̫̠B̻͍͙͉̳ͅe̵h̵̬͇̫͙i̹͓̳̳̮͎̫̕n͟d̴̪̜̖ ̰͉̩͇͙̲͞ͅT͖̼͓̪͢h͏͓̮̻e̬̝̟ͅ ̤̹̝W͙̞̝͔͇͝ͅa͏͓͔̹̼̣l̴͔̰̤̟͔ḽ̫.͕
|
||||
Z̮̞̠͙͔ͅḀ̗̞͈̻̗Ḷ͙͎̯̹̞͓G̻O̭̗̮
|
||||
|
||||
# Unicode Upsidedown
|
||||
#
|
||||
# Strings which contain unicode with an "upsidedown" effect (via http://www.upsidedowntext.com)
|
||||
|
||||
˙ɐnbᴉlɐ ɐuƃɐɯ ǝɹolop ʇǝ ǝɹoqɐl ʇn ʇunpᴉpᴉɔuᴉ ɹodɯǝʇ poɯsnᴉǝ op pǝs 'ʇᴉlǝ ƃuᴉɔsᴉdᴉpɐ ɹnʇǝʇɔǝsuoɔ 'ʇǝɯɐ ʇᴉs ɹolop ɯnsdᴉ ɯǝɹo˥
|
||||
00˙Ɩ$-
|
||||
|
||||
# Unicode font
|
||||
#
|
||||
# Strings which contain bold/italic/etc. versions of normal characters
|
||||
|
||||
The quick brown fox jumps over the lazy dog
|
||||
𝐓𝐡𝐞 𝐪𝐮𝐢𝐜𝐤 𝐛𝐫𝐨𝐰𝐧 𝐟𝐨𝐱 𝐣𝐮𝐦𝐩𝐬 𝐨𝐯𝐞𝐫 𝐭𝐡𝐞 𝐥𝐚𝐳𝐲 𝐝𝐨𝐠
|
||||
𝕿𝖍𝖊 𝖖𝖚𝖎𝖈𝖐 𝖇𝖗𝖔𝖜𝖓 𝖋𝖔𝖝 𝖏𝖚𝖒𝖕𝖘 𝖔𝖛𝖊𝖗 𝖙𝖍𝖊 𝖑𝖆𝖟𝖞 𝖉𝖔𝖌
|
||||
𝑻𝒉𝒆 𝒒𝒖𝒊𝒄𝒌 𝒃𝒓𝒐𝒘𝒏 𝒇𝒐𝒙 𝒋𝒖𝒎𝒑𝒔 𝒐𝒗𝒆𝒓 𝒕𝒉𝒆 𝒍𝒂𝒛𝒚 𝒅𝒐𝒈
|
||||
𝓣𝓱𝓮 𝓺𝓾𝓲𝓬𝓴 𝓫𝓻𝓸𝔀𝓷 𝓯𝓸𝔁 𝓳𝓾𝓶𝓹𝓼 𝓸𝓿𝓮𝓻 𝓽𝓱𝓮 𝓵𝓪𝔃𝔂 𝓭𝓸𝓰
|
||||
𝕋𝕙𝕖 𝕢𝕦𝕚𝕔𝕜 𝕓𝕣𝕠𝕨𝕟 𝕗𝕠𝕩 𝕛𝕦𝕞𝕡𝕤 𝕠𝕧𝕖𝕣 𝕥𝕙𝕖 𝕝𝕒𝕫𝕪 𝕕𝕠𝕘
|
||||
𝚃𝚑𝚎 𝚚𝚞𝚒𝚌𝚔 𝚋𝚛𝚘𝚠𝚗 𝚏𝚘𝚡 𝚓𝚞𝚖𝚙𝚜 𝚘𝚟𝚎𝚛 𝚝𝚑𝚎 𝚕𝚊𝚣𝚢 𝚍𝚘𝚐
|
||||
⒯⒣⒠ ⒬⒰⒤⒞⒦ ⒝⒭⒪⒲⒩ ⒡⒪⒳ ⒥⒰⒨⒫⒮ ⒪⒱⒠⒭ ⒯⒣⒠ ⒧⒜⒵⒴ ⒟⒪⒢
|
||||
|
||||
# Script Injection
|
||||
#
|
||||
# Strings which attempt to invoke a benign script injection; shows vulnerability to XSS
|
||||
|
||||
<script>alert(0)</script>
|
||||
<script>alert('1');</script>
|
||||
<img src=x onerror=alert(2) />
|
||||
<svg><script>123<1>alert(3)</script>
|
||||
"><script>alert(4)</script>
|
||||
'><script>alert(5)</script>
|
||||
><script>alert(6)</script>
|
||||
</script><script>alert(7)</script>
|
||||
< / script >< script >alert(8)< / script >
|
||||
onfocus=JaVaSCript:alert(9) autofocus
|
||||
" onfocus=JaVaSCript:alert(10) autofocus
|
||||
' onfocus=JaVaSCript:alert(11) autofocus
|
||||
<script>alert(12)</script>
|
||||
<sc<script>ript>alert(13)</sc</script>ript>
|
||||
--><script>alert(14)</script>
|
||||
";alert(15);t="
|
||||
';alert(16);t='
|
||||
JavaSCript:alert(17)
|
||||
;alert(18);
|
||||
src=JaVaSCript:prompt(19)
|
||||
"><script>alert(20);</script x="
|
||||
'><script>alert(21);</script x='
|
||||
><script>alert(22);</script x=
|
||||
" autofocus onkeyup="javascript:alert(23)
|
||||
' autofocus onkeyup='javascript:alert(24)
|
||||
<script\x20type="text/javascript">javascript:alert(25);</script>
|
||||
<script\x3Etype="text/javascript">javascript:alert(26);</script>
|
||||
<script\x0Dtype="text/javascript">javascript:alert(27);</script>
|
||||
<script\x09type="text/javascript">javascript:alert(28);</script>
|
||||
<script\x0Ctype="text/javascript">javascript:alert(29);</script>
|
||||
<script\x2Ftype="text/javascript">javascript:alert(30);</script>
|
||||
<script\x0Atype="text/javascript">javascript:alert(31);</script>
|
||||
'`"><\x3Cscript>javascript:alert(32)</script>
|
||||
'`"><\x00script>javascript:alert(33)</script>
|
||||
ABC<div style="x\x3Aexpression(javascript:alert(34)">DEF
|
||||
ABC<div style="x:expression\x5C(javascript:alert(35)">DEF
|
||||
ABC<div style="x:expression\x00(javascript:alert(36)">DEF
|
||||
ABC<div style="x:exp\x00ression(javascript:alert(37)">DEF
|
||||
ABC<div style="x:exp\x5Cression(javascript:alert(38)">DEF
|
||||
ABC<div style="x:\x0Aexpression(javascript:alert(39)">DEF
|
||||
ABC<div style="x:\x09expression(javascript:alert(40)">DEF
|
||||
ABC<div style="x:\xE3\x80\x80expression(javascript:alert(41)">DEF
|
||||
ABC<div style="x:\xE2\x80\x84expression(javascript:alert(42)">DEF
|
||||
ABC<div style="x:\xC2\xA0expression(javascript:alert(43)">DEF
|
||||
ABC<div style="x:\xE2\x80\x80expression(javascript:alert(44)">DEF
|
||||
ABC<div style="x:\xE2\x80\x8Aexpression(javascript:alert(45)">DEF
|
||||
ABC<div style="x:\x0Dexpression(javascript:alert(46)">DEF
|
||||
ABC<div style="x:\x0Cexpression(javascript:alert(47)">DEF
|
||||
ABC<div style="x:\xE2\x80\x87expression(javascript:alert(48)">DEF
|
||||
ABC<div style="x:\xEF\xBB\xBFexpression(javascript:alert(49)">DEF
|
||||
ABC<div style="x:\x20expression(javascript:alert(50)">DEF
|
||||
ABC<div style="x:\xE2\x80\x88expression(javascript:alert(51)">DEF
|
||||
ABC<div style="x:\x00expression(javascript:alert(52)">DEF
|
||||
ABC<div style="x:\xE2\x80\x8Bexpression(javascript:alert(53)">DEF
|
||||
ABC<div style="x:\xE2\x80\x86expression(javascript:alert(54)">DEF
|
||||
ABC<div style="x:\xE2\x80\x85expression(javascript:alert(55)">DEF
|
||||
ABC<div style="x:\xE2\x80\x82expression(javascript:alert(56)">DEF
|
||||
ABC<div style="x:\x0Bexpression(javascript:alert(57)">DEF
|
||||
ABC<div style="x:\xE2\x80\x81expression(javascript:alert(58)">DEF
|
||||
ABC<div style="x:\xE2\x80\x83expression(javascript:alert(59)">DEF
|
||||
ABC<div style="x:\xE2\x80\x89expression(javascript:alert(60)">DEF
|
||||
<a href="\x0Bjavascript:javascript:alert(61)" id="fuzzelement1">test</a>
|
||||
<a href="\x0Fjavascript:javascript:alert(62)" id="fuzzelement1">test</a>
|
||||
<a href="\xC2\xA0javascript:javascript:alert(63)" id="fuzzelement1">test</a>
|
||||
<a href="\x05javascript:javascript:alert(64)" id="fuzzelement1">test</a>
|
||||
<a href="\xE1\xA0\x8Ejavascript:javascript:alert(65)" id="fuzzelement1">test</a>
|
||||
<a href="\x18javascript:javascript:alert(66)" id="fuzzelement1">test</a>
|
||||
<a href="\x11javascript:javascript:alert(67)" id="fuzzelement1">test</a>
|
||||
<a href="\xE2\x80\x88javascript:javascript:alert(68)" id="fuzzelement1">test</a>
|
||||
<a href="\xE2\x80\x89javascript:javascript:alert(69)" id="fuzzelement1">test</a>
|
||||
<a href="\xE2\x80\x80javascript:javascript:alert(70)" id="fuzzelement1">test</a>
|
||||
<a href="\x17javascript:javascript:alert(71)" id="fuzzelement1">test</a>
|
||||
<a href="\x03javascript:javascript:alert(72)" id="fuzzelement1">test</a>
|
||||
<a href="\x0Ejavascript:javascript:alert(73)" id="fuzzelement1">test</a>
|
||||
<a href="\x1Ajavascript:javascript:alert(74)" id="fuzzelement1">test</a>
|
||||
<a href="\x00javascript:javascript:alert(75)" id="fuzzelement1">test</a>
|
||||
<a href="\x10javascript:javascript:alert(76)" id="fuzzelement1">test</a>
|
||||
<a href="\xE2\x80\x82javascript:javascript:alert(77)" id="fuzzelement1">test</a>
|
||||
<a href="\x20javascript:javascript:alert(78)" id="fuzzelement1">test</a>
|
||||
<a href="\x13javascript:javascript:alert(79)" id="fuzzelement1">test</a>
|
||||
<a href="\x09javascript:javascript:alert(80)" id="fuzzelement1">test</a>
|
||||
<a href="\xE2\x80\x8Ajavascript:javascript:alert(81)" id="fuzzelement1">test</a>
|
||||
<a href="\x14javascript:javascript:alert(82)" id="fuzzelement1">test</a>
|
||||
<a href="\x19javascript:javascript:alert(83)" id="fuzzelement1">test</a>
|
||||
<a href="\xE2\x80\xAFjavascript:javascript:alert(84)" id="fuzzelement1">test</a>
|
||||
<a href="\x1Fjavascript:javascript:alert(85)" id="fuzzelement1">test</a>
|
||||
<a href="\xE2\x80\x81javascript:javascript:alert(86)" id="fuzzelement1">test</a>
|
||||
<a href="\x1Djavascript:javascript:alert(87)" id="fuzzelement1">test</a>
|
||||
<a href="\xE2\x80\x87javascript:javascript:alert(88)" id="fuzzelement1">test</a>
|
||||
<a href="\x07javascript:javascript:alert(89)" id="fuzzelement1">test</a>
|
||||
<a href="\xE1\x9A\x80javascript:javascript:alert(90)" id="fuzzelement1">test</a>
|
||||
<a href="\xE2\x80\x83javascript:javascript:alert(91)" id="fuzzelement1">test</a>
|
||||
<a href="\x04javascript:javascript:alert(92)" id="fuzzelement1">test</a>
|
||||
<a href="\x01javascript:javascript:alert(93)" id="fuzzelement1">test</a>
|
||||
<a href="\x08javascript:javascript:alert(94)" id="fuzzelement1">test</a>
|
||||
<a href="\xE2\x80\x84javascript:javascript:alert(95)" id="fuzzelement1">test</a>
|
||||
<a href="\xE2\x80\x86javascript:javascript:alert(96)" id="fuzzelement1">test</a>
|
||||
<a href="\xE3\x80\x80javascript:javascript:alert(97)" id="fuzzelement1">test</a>
|
||||
<a href="\x12javascript:javascript:alert(98)" id="fuzzelement1">test</a>
|
||||
<a href="\x0Djavascript:javascript:alert(99)" id="fuzzelement1">test</a>
|
||||
<a href="\x0Ajavascript:javascript:alert(100)" id="fuzzelement1">test</a>
|
||||
<a href="\x0Cjavascript:javascript:alert(101)" id="fuzzelement1">test</a>
|
||||
<a href="\x15javascript:javascript:alert(102)" id="fuzzelement1">test</a>
|
||||
<a href="\xE2\x80\xA8javascript:javascript:alert(103)" id="fuzzelement1">test</a>
|
||||
<a href="\x16javascript:javascript:alert(104)" id="fuzzelement1">test</a>
|
||||
<a href="\x02javascript:javascript:alert(105)" id="fuzzelement1">test</a>
|
||||
<a href="\x1Bjavascript:javascript:alert(106)" id="fuzzelement1">test</a>
|
||||
<a href="\x06javascript:javascript:alert(107)" id="fuzzelement1">test</a>
|
||||
<a href="\xE2\x80\xA9javascript:javascript:alert(108)" id="fuzzelement1">test</a>
|
||||
<a href="\xE2\x80\x85javascript:javascript:alert(109)" id="fuzzelement1">test</a>
|
||||
<a href="\x1Ejavascript:javascript:alert(110)" id="fuzzelement1">test</a>
|
||||
<a href="\xE2\x81\x9Fjavascript:javascript:alert(111)" id="fuzzelement1">test</a>
|
||||
<a href="\x1Cjavascript:javascript:alert(112)" id="fuzzelement1">test</a>
|
||||
<a href="javascript\x00:javascript:alert(113)" id="fuzzelement1">test</a>
|
||||
<a href="javascript\x3A:javascript:alert(114)" id="fuzzelement1">test</a>
|
||||
<a href="javascript\x09:javascript:alert(115)" id="fuzzelement1">test</a>
|
||||
<a href="javascript\x0D:javascript:alert(116)" id="fuzzelement1">test</a>
|
||||
<a href="javascript\x0A:javascript:alert(117)" id="fuzzelement1">test</a>
|
||||
`"'><img src=xxx:x \x0Aonerror=javascript:alert(118)>
|
||||
`"'><img src=xxx:x \x22onerror=javascript:alert(119)>
|
||||
`"'><img src=xxx:x \x0Bonerror=javascript:alert(120)>
|
||||
`"'><img src=xxx:x \x0Donerror=javascript:alert(121)>
|
||||
`"'><img src=xxx:x \x2Fonerror=javascript:alert(122)>
|
||||
`"'><img src=xxx:x \x09onerror=javascript:alert(123)>
|
||||
`"'><img src=xxx:x \x0Conerror=javascript:alert(124)>
|
||||
`"'><img src=xxx:x \x00onerror=javascript:alert(125)>
|
||||
`"'><img src=xxx:x \x27onerror=javascript:alert(126)>
|
||||
`"'><img src=xxx:x \x20onerror=javascript:alert(127)>
|
||||
"`'><script>\x3Bjavascript:alert(128)</script>
|
||||
"`'><script>\x0Djavascript:alert(129)</script>
|
||||
"`'><script>\xEF\xBB\xBFjavascript:alert(130)</script>
|
||||
"`'><script>\xE2\x80\x81javascript:alert(131)</script>
|
||||
"`'><script>\xE2\x80\x84javascript:alert(132)</script>
|
||||
"`'><script>\xE3\x80\x80javascript:alert(133)</script>
|
||||
"`'><script>\x09javascript:alert(134)</script>
|
||||
"`'><script>\xE2\x80\x89javascript:alert(135)</script>
|
||||
"`'><script>\xE2\x80\x85javascript:alert(136)</script>
|
||||
"`'><script>\xE2\x80\x88javascript:alert(137)</script>
|
||||
"`'><script>\x00javascript:alert(138)</script>
|
||||
"`'><script>\xE2\x80\xA8javascript:alert(139)</script>
|
||||
"`'><script>\xE2\x80\x8Ajavascript:alert(140)</script>
|
||||
"`'><script>\xE1\x9A\x80javascript:alert(141)</script>
|
||||
"`'><script>\x0Cjavascript:alert(142)</script>
|
||||
"`'><script>\x2Bjavascript:alert(143)</script>
|
||||
"`'><script>\xF0\x90\x96\x9Ajavascript:alert(144)</script>
|
||||
"`'><script>-javascript:alert(145)</script>
|
||||
"`'><script>\x0Ajavascript:alert(146)</script>
|
||||
"`'><script>\xE2\x80\xAFjavascript:alert(147)</script>
|
||||
"`'><script>\x7Ejavascript:alert(148)</script>
|
||||
"`'><script>\xE2\x80\x87javascript:alert(149)</script>
|
||||
"`'><script>\xE2\x81\x9Fjavascript:alert(150)</script>
|
||||
"`'><script>\xE2\x80\xA9javascript:alert(151)</script>
|
||||
"`'><script>\xC2\x85javascript:alert(152)</script>
|
||||
"`'><script>\xEF\xBF\xAEjavascript:alert(153)</script>
|
||||
"`'><script>\xE2\x80\x83javascript:alert(154)</script>
|
||||
"`'><script>\xE2\x80\x8Bjavascript:alert(155)</script>
|
||||
"`'><script>\xEF\xBF\xBEjavascript:alert(156)</script>
|
||||
"`'><script>\xE2\x80\x80javascript:alert(157)</script>
|
||||
"`'><script>\x21javascript:alert(158)</script>
|
||||
"`'><script>\xE2\x80\x82javascript:alert(159)</script>
|
||||
"`'><script>\xE2\x80\x86javascript:alert(160)</script>
|
||||
"`'><script>\xE1\xA0\x8Ejavascript:alert(161)</script>
|
||||
"`'><script>\x0Bjavascript:alert(162)</script>
|
||||
"`'><script>\x20javascript:alert(163)</script>
|
||||
"`'><script>\xC2\xA0javascript:alert(164)</script>
|
||||
<img \x00src=x onerror="alert(165)">
|
||||
<img \x47src=x onerror="javascript:alert(166)">
|
||||
<img \x11src=x onerror="javascript:alert(167)">
|
||||
<img \x12src=x onerror="javascript:alert(168)">
|
||||
<img\x47src=x onerror="javascript:alert(169)">
|
||||
<img\x10src=x onerror="javascript:alert(170)">
|
||||
<img\x13src=x onerror="javascript:alert(171)">
|
||||
<img\x32src=x onerror="javascript:alert(172)">
|
||||
<img\x47src=x onerror="javascript:alert(173)">
|
||||
<img\x11src=x onerror="javascript:alert(174)">
|
||||
<img \x47src=x onerror="javascript:alert(175)">
|
||||
<img \x34src=x onerror="javascript:alert(176)">
|
||||
<img \x39src=x onerror="javascript:alert(177)">
|
||||
<img \x00src=x onerror="javascript:alert(178)">
|
||||
<img src\x09=x onerror="javascript:alert(179)">
|
||||
<img src\x10=x onerror="javascript:alert(180)">
|
||||
<img src\x13=x onerror="javascript:alert(181)">
|
||||
<img src\x32=x onerror="javascript:alert(182)">
|
||||
<img src\x12=x onerror="javascript:alert(183)">
|
||||
<img src\x11=x onerror="javascript:alert(184)">
|
||||
<img src\x00=x onerror="javascript:alert(185)">
|
||||
<img src\x47=x onerror="javascript:alert(186)">
|
||||
<img src=x\x09onerror="javascript:alert(187)">
|
||||
<img src=x\x10onerror="javascript:alert(188)">
|
||||
<img src=x\x11onerror="javascript:alert(189)">
|
||||
<img src=x\x12onerror="javascript:alert(190)">
|
||||
<img src=x\x13onerror="javascript:alert(191)">
|
||||
<img[a][b][c]src[d]=x[e]onerror=[f]"alert(192)">
|
||||
<img src=x onerror=\x09"javascript:alert(193)">
|
||||
<img src=x onerror=\x10"javascript:alert(194)">
|
||||
<img src=x onerror=\x11"javascript:alert(195)">
|
||||
<img src=x onerror=\x12"javascript:alert(196)">
|
||||
<img src=x onerror=\x32"javascript:alert(197)">
|
||||
<img src=x onerror=\x00"javascript:alert(198)">
|
||||
<a href=javascript:javascript:alert(199)>XXX</a>
|
||||
<img src="x` `<script>javascript:alert(200)</script>"` `>
|
||||
<img src onerror /" '"= alt=javascript:alert(201)//">
|
||||
<title onpropertychange=javascript:alert(202)></title><title title=>
|
||||
<a href=http://foo.bar/#x=`y></a><img alt="`><img src=x:x onerror=javascript:alert(203)></a>">
|
||||
<!--[if]><script>javascript:alert(204)</script -->
|
||||
<!--[if<img src=x onerror=javascript:alert(205)//]> -->
|
||||
<script src="/\%(jscript)s"></script>
|
||||
<script src="\\%(jscript)s"></script>
|
||||
<IMG """><SCRIPT>alert("206")</SCRIPT>">
|
||||
<IMG SRC=javascript:alert(String.fromCharCode(50,48,55))>
|
||||
<IMG SRC=# onmouseover="alert('208')">
|
||||
<IMG SRC= onmouseover="alert('209')">
|
||||
<IMG onmouseover="alert('210')">
|
||||
<IMG SRC=javascript:alert('211')>
|
||||
<IMG SRC=javascript:alert('212')>
|
||||
<IMG SRC=javascript:alert('213')>
|
||||
<IMG SRC="jav ascript:alert('214');">
|
||||
<IMG SRC="jav	ascript:alert('215');">
|
||||
<IMG SRC="jav
ascript:alert('216');">
|
||||
<IMG SRC="jav
ascript:alert('217');">
|
||||
perl -e 'print "<IMG SRC=java\0script:alert(\"218\")>";' > out
|
||||
<IMG SRC="  javascript:alert('219');">
|
||||
<SCRIPT/XSS SRC="http://ha.ckers.org/xss.js"></SCRIPT>
|
||||
<BODY onload!#$%&()*~+-_.,:;?@[/|\]^`=alert("220")>
|
||||
<SCRIPT/SRC="http://ha.ckers.org/xss.js"></SCRIPT>
|
||||
<<SCRIPT>alert("221");//<</SCRIPT>
|
||||
<SCRIPT SRC=http://ha.ckers.org/xss.js?< B >
|
||||
<SCRIPT SRC=//ha.ckers.org/.j>
|
||||
<IMG SRC="javascript:alert('222')"
|
||||
<iframe src=http://ha.ckers.org/scriptlet.html <
|
||||
\";alert('223');//
|
||||
<u oncopy=alert()> Copy me</u>
|
||||
<i onwheel=alert(224)> Scroll over me </i>
|
||||
<plaintext>
|
||||
http://a/%%30%30
|
||||
</textarea><script>alert(225)</script>
|
||||
|
||||
# SQL Injection
|
||||
#
|
||||
# Strings which can cause a SQL injection if inputs are not sanitized
|
||||
|
||||
1;DROP TABLE users
|
||||
1'; DROP TABLE users-- 1
|
||||
' OR 1=1 -- 1
|
||||
' OR '1'='1
|
||||
'; EXEC sp_MSForEachTable 'DROP TABLE ?'; --
|
||||
|
||||
%
|
||||
_
|
||||
|
||||
# Server Code Injection
|
||||
#
|
||||
# Strings which can cause user to run code on server as a privileged user (c.f. https://news.ycombinator.com/item?id=7665153)
|
||||
|
||||
-
|
||||
--
|
||||
--version
|
||||
--help
|
||||
$USER
|
||||
/dev/null; touch /tmp/blns.fail ; echo
|
||||
`touch /tmp/blns.fail`
|
||||
$(touch /tmp/blns.fail)
|
||||
@{[system "touch /tmp/blns.fail"]}
|
||||
|
||||
# Command Injection (Ruby)
|
||||
#
|
||||
# Strings which can call system commands within Ruby/Rails applications
|
||||
|
||||
eval("puts 'hello world'")
|
||||
System("ls -al /")
|
||||
`ls -al /`
|
||||
Kernel.exec("ls -al /")
|
||||
Kernel.exit(1)
|
||||
%x('ls -al /')
|
||||
|
||||
# XXE Injection (XML)
|
||||
#
|
||||
# String which can reveal system files when parsed by a badly configured XML parser
|
||||
|
||||
<?xml version="1.0" encoding="ISO-8859-1"?><!DOCTYPE foo [ <!ELEMENT foo ANY ><!ENTITY xxe SYSTEM "file:///etc/passwd" >]><foo>&xxe;</foo>
|
||||
|
||||
# Unwanted Interpolation
|
||||
#
|
||||
# Strings which can be accidentally expanded into different strings if evaluated in the wrong context, e.g. used as a printf format string or via Perl or shell eval. Might expose sensitive data from the program doing the interpolation, or might just represent the wrong string.
|
||||
|
||||
$HOME
|
||||
$ENV{'HOME'}
|
||||
%d
|
||||
%s%s%s%s%s
|
||||
{0}
|
||||
%*.*s
|
||||
%@
|
||||
%n
|
||||
File:///
|
||||
|
||||
# File Inclusion
|
||||
#
|
||||
# Strings which can cause user to pull in files that should not be a part of a web server
|
||||
|
||||
../../../../../../../../../../../etc/passwd%00
|
||||
../../../../../../../../../../../etc/hosts
|
||||
|
||||
# Known CVEs and Vulnerabilities
|
||||
#
|
||||
# Strings that test for known vulnerabilities
|
||||
|
||||
() { 0; }; touch /tmp/blns.shellshock1.fail;
|
||||
() { _; } >_[$($())] { touch /tmp/blns.shellshock2.fail; }
|
||||
<<< %s(un='%s') = %u
|
||||
+++ATH0
|
||||
|
||||
# MSDOS/Windows Special Filenames
|
||||
#
|
||||
# Strings which are reserved characters in MSDOS/Windows
|
||||
|
||||
CON
|
||||
PRN
|
||||
AUX
|
||||
CLOCK$
|
||||
NUL
|
||||
A:
|
||||
ZZ:
|
||||
COM1
|
||||
LPT1
|
||||
LPT2
|
||||
LPT3
|
||||
COM2
|
||||
COM3
|
||||
COM4
|
||||
|
||||
# IRC specific strings
|
||||
#
|
||||
# Strings that may occur on IRC clients that make security products freak out
|
||||
|
||||
DCC SEND STARTKEYLOGGER 0 0 0
|
||||
|
||||
# Scunthorpe Problem
|
||||
#
|
||||
# Innocuous strings which may be blocked by profanity filters (https://en.wikipedia.org/wiki/Scunthorpe_problem)
|
||||
|
||||
Scunthorpe General Hospital
|
||||
Penistone Community Church
|
||||
Lightwater Country Park
|
||||
Jimmy Clitheroe
|
||||
Horniman Museum
|
||||
shitake mushrooms
|
||||
RomansInSussex.co.uk
|
||||
http://www.cum.qc.ca/
|
||||
Craig Cockburn, Software Specialist
|
||||
Linda Callahan
|
||||
Dr. Herman I. Libshitz
|
||||
magna cum laude
|
||||
Super Bowl XXX
|
||||
medieval erection of parapets
|
||||
evaluate
|
||||
mocha
|
||||
expression
|
||||
Arsenal canal
|
||||
classic
|
||||
Tyson Gay
|
||||
Dick Van Dyke
|
||||
basement
|
||||
|
||||
# Human injection
|
||||
#
|
||||
# Strings which may cause human to reinterpret worldview
|
||||
|
||||
If you're reading this, you've been in a coma for almost 20 years now. We're trying a new technique. We don't know where this message will end up in your dream, but we hope it works. Please wake up, we miss you.
|
||||
|
||||
# Terminal escape codes
|
||||
#
|
||||
# Strings which punish the fools who use cat/type on this file
|
||||
|
||||
Roses are [0;31mred[0m, violets are [0;34mblue. Hope you enjoy terminal hue
|
||||
But now...[20Cfor my greatest trick...[8m
|
||||
The quick brown fox... [Beeeep]
|
||||
|
||||
# iOS Vulnerabilities
|
||||
#
|
||||
# Strings which crashed iMessage in various versions of iOS
|
||||
|
||||
Powerلُلُصّبُلُلصّبُررً ॣ ॣh ॣ ॣ冗
|
||||
🏳0🌈️
|
||||
జ్ఞా
|
||||
|
||||
# Persian special characters
|
||||
#
|
||||
# This is a four characters string which includes Persian special characters (گچپژ)
|
||||
|
||||
گچپژ
|
||||
|
||||
# jinja2 injection
|
||||
#
|
||||
# first one is supposed to raise "MemoryError" exception
|
||||
# second, obviously, prints contents of /etc/passwd
|
||||
|
||||
{% print 'x' * 64 * 1024**3 %}
|
||||
{{ "".__class__.__mro__[2].__subclasses__()[40]("/etc/passwd").read() }}
|
||||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -1,76 +0,0 @@
|
|||
mod example_document;
|
||||
|
||||
use std::{fs, path::Path};
|
||||
|
||||
use example_document::ExampleDocument;
|
||||
use reconcile::{reconcile, reconcile_with_cursors};
|
||||
use serde::Deserialize;
|
||||
|
||||
#[test]
|
||||
fn test_document_one_way_without_cursors() {
|
||||
for doc in &get_all_documents() {
|
||||
doc.assert_eq_without_cursors(&reconcile(
|
||||
&doc.parent(),
|
||||
&doc.left().text,
|
||||
&doc.right().text,
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_document_one_way_with_cursors() {
|
||||
for doc in &get_all_documents() {
|
||||
doc.assert_eq(&reconcile_with_cursors(
|
||||
&doc.parent(),
|
||||
doc.left(),
|
||||
doc.right(),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_document_inverse_way_without_cursors() {
|
||||
for doc in &get_all_documents() {
|
||||
doc.assert_eq_without_cursors(&reconcile(
|
||||
&doc.parent(),
|
||||
&doc.right().text,
|
||||
&doc.left().text,
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_document_inverse_way_with_cursors() {
|
||||
for doc in &get_all_documents() {
|
||||
doc.assert_eq(&reconcile_with_cursors(
|
||||
&doc.parent(),
|
||||
doc.right(),
|
||||
doc.left(),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
fn get_all_documents() -> Vec<ExampleDocument> {
|
||||
let examples_dir = Path::new("tests/examples");
|
||||
let entries = fs::read_dir(examples_dir)
|
||||
.expect("Failed to read examples directory")
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let mut documents = Vec::new();
|
||||
|
||||
for entry in entries {
|
||||
let entry = entry.expect("Failed to read directory entry");
|
||||
let path = entry.path();
|
||||
|
||||
if path.is_file() && path.extension().and_then(|ext| ext.to_str()) == Some("yml") {
|
||||
let file = fs::File::open(&path).expect("Failed to open example file");
|
||||
for document in serde_yaml::Deserializer::from_reader(file) {
|
||||
let doc =
|
||||
ExampleDocument::deserialize(document).expect("Failed to deserialize document");
|
||||
documents.push(doc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
documents
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue