Add tokenizer
This commit is contained in:
parent
e910d9c5f4
commit
331e264399
7 changed files with 119 additions and 33 deletions
|
|
@ -18,6 +18,7 @@
|
|||
//! without making reasonable progress.
|
||||
//! For potential improvements here see [similar#15](https://github.com/mitsuhiko/similar/issues/15).
|
||||
|
||||
use std::hash::Hash;
|
||||
use std::ops::{Index, IndexMut, Range};
|
||||
use std::vec;
|
||||
|
||||
|
|
@ -33,11 +34,14 @@ use super::raw_operation::RawOperation;
|
|||
///
|
||||
/// This diff is done with an optional deadline that defines the maximal
|
||||
/// execution time permitted before it bails and falls back to an approximation.
|
||||
pub fn diff(old: &[Token], new: &[Token]) -> Vec<RawOperation> {
|
||||
pub fn diff<T>(old: &[Token<T>], new: &[Token<T>]) -> Vec<RawOperation<T>>
|
||||
where
|
||||
T: PartialEq + Hash + Clone,
|
||||
{
|
||||
let max_d = max_d(old.len(), new.len());
|
||||
let mut vb = V::new(max_d);
|
||||
let mut vf = V::new(max_d);
|
||||
let mut result: Vec<RawOperation> = vec![];
|
||||
let mut result: Vec<RawOperation<T>> = vec![];
|
||||
conquer(
|
||||
old,
|
||||
0..old.len(),
|
||||
|
|
@ -118,14 +122,17 @@ fn split_at(range: Range<usize>, at: usize) -> (Range<usize>, Range<usize>) {
|
|||
/// simultaneously run the basic algorithm in both the forward and reverse
|
||||
/// directions until furthest reaching forward and reverse paths starting at
|
||||
/// opposing corners 'overlap'.
|
||||
fn find_middle_snake(
|
||||
old: &[Token],
|
||||
fn find_middle_snake<T>(
|
||||
old: &[Token<T>],
|
||||
old_range: Range<usize>,
|
||||
new: &[Token],
|
||||
new: &[Token<T>],
|
||||
new_range: Range<usize>,
|
||||
vf: &mut V,
|
||||
vb: &mut V,
|
||||
) -> Option<(usize, usize)> {
|
||||
) -> Option<(usize, usize)>
|
||||
where
|
||||
T: PartialEq + Hash + Clone,
|
||||
{
|
||||
let n = old_range.len();
|
||||
let m = new_range.len();
|
||||
|
||||
|
|
@ -222,15 +229,17 @@ fn find_middle_snake(
|
|||
None
|
||||
}
|
||||
|
||||
fn conquer(
|
||||
old: &[Token],
|
||||
fn conquer<T>(
|
||||
old: &[Token<T>],
|
||||
mut old_range: Range<usize>,
|
||||
new: &[Token],
|
||||
new: &[Token<T>],
|
||||
mut new_range: Range<usize>,
|
||||
vf: &mut V,
|
||||
vb: &mut V,
|
||||
result: &mut Vec<RawOperation>,
|
||||
) {
|
||||
result: &mut Vec<RawOperation<T>>,
|
||||
) where
|
||||
T: PartialEq + Hash + Clone,
|
||||
{
|
||||
// Check for common prefix
|
||||
let common_prefix_len = common_prefix_len(old, old_range.clone(), new, new_range.clone());
|
||||
if common_prefix_len > 0 {
|
||||
|
|
|
|||
|
|
@ -1,14 +1,21 @@
|
|||
use crate::tokenizer::token::Token;
|
||||
use std::hash::Hash;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum RawOperation {
|
||||
Insert(Vec<Token>),
|
||||
Delete(Vec<Token>),
|
||||
Equal(Vec<Token>),
|
||||
pub enum RawOperation<T>
|
||||
where
|
||||
T: PartialEq + Hash + Clone,
|
||||
{
|
||||
Insert(Vec<Token<T>>),
|
||||
Delete(Vec<Token<T>>),
|
||||
Equal(Vec<Token<T>>),
|
||||
}
|
||||
|
||||
impl RawOperation {
|
||||
pub fn tokens(&self) -> &Vec<Token> {
|
||||
impl<T> RawOperation<T>
|
||||
where
|
||||
T: PartialEq + Hash + Clone,
|
||||
{
|
||||
pub fn tokens(&self) -> &Vec<Token<T>> {
|
||||
match self {
|
||||
RawOperation::Insert(tokens) => tokens,
|
||||
RawOperation::Delete(tokens) => tokens,
|
||||
|
|
@ -17,13 +24,28 @@ impl RawOperation {
|
|||
}
|
||||
|
||||
pub fn original_text_length(&self) -> usize {
|
||||
self.tokens()
|
||||
.iter()
|
||||
.map(|t| t.original.chars().count())
|
||||
.sum()
|
||||
self.tokens().iter().map(|t| t.get_original_length()).sum()
|
||||
}
|
||||
|
||||
pub fn get_original_text(self) -> String {
|
||||
self.tokens().iter().map(|t| t.original.clone()).collect()
|
||||
self.tokens().iter().map(|t| t.original()).collect()
|
||||
}
|
||||
|
||||
/// Extends the operation with another operation if returning the new operation.
|
||||
/// Only operations of the same type can be used to extend. If the operations are of different
|
||||
/// types, returns None.
|
||||
pub fn extend(self, other: RawOperation<T>) -> Option<RawOperation<T>> {
|
||||
match (self, other) {
|
||||
(RawOperation::Insert(tokens1), RawOperation::Insert(tokens2)) => Some(
|
||||
RawOperation::Insert(tokens1.into_iter().chain(tokens2.into_iter()).collect()),
|
||||
),
|
||||
(RawOperation::Delete(tokens1), RawOperation::Delete(tokens2)) => Some(
|
||||
RawOperation::Delete(tokens1.into_iter().chain(tokens2.into_iter()).collect()),
|
||||
),
|
||||
(RawOperation::Equal(tokens1), RawOperation::Equal(tokens2)) => Some(
|
||||
RawOperation::Equal(tokens1.into_iter().chain(tokens2.into_iter()).collect()),
|
||||
),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue