Fix syncing when network latency is present (#4)
* WIP * Add debug * Dedupe inserts * Add deterministic ordering * Fix whitespaces * Update insta * Add integration test script * Rename * Add test * Working for non-deletes * omg it mostly works for deletes * Isdeleted fix * remove created dates * update api * Take document id * No max attempt * works * Use string uuids * . * working!!!! (hopefully) * Improve bundling * Add module * lint * . * lint * Fix CI * use toolchain * clean up * Add useSlowFileEvents * Delete fuzz * Fix CI * use docker * fix script * clean up * Clean up * change node version * Build docker image on every commit * fix ci * 1 db per vault * Add scritps folder * Bump versions * Lint * . * Fix tests for real * Style * . * try * Consistent ordering * Fix tests * hmm * . * Clean up diff * Fixes * . * Fix version bump * . * . * .
This commit is contained in:
parent
bcf48c428d
commit
8b8f1d91d9
91 changed files with 2252 additions and 1586 deletions
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
source: reconcile/src/tokenizer/word_tokenizer.rs
|
||||
expression: "word_tokenizer(\"\")"
|
||||
snapshot_kind: text
|
||||
---
|
||||
[]
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
---
|
||||
source: reconcile/src/tokenizer/word_tokenizer.rs
|
||||
expression: "word_tokenizer(\" what? \")"
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
Token {
|
||||
normalised: "what?",
|
||||
original: " what?",
|
||||
},
|
||||
Token {
|
||||
normalised: "",
|
||||
original: " ",
|
||||
},
|
||||
]
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
---
|
||||
source: reconcile/src/tokenizer/word_tokenizer.rs
|
||||
expression: "word_tokenizer(\" hello, \\nwhere are you?\")"
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
Token {
|
||||
normalised: "hello,",
|
||||
original: " hello,",
|
||||
},
|
||||
Token {
|
||||
normalised: "where",
|
||||
original: " \nwhere",
|
||||
},
|
||||
Token {
|
||||
normalised: "are",
|
||||
original: " are",
|
||||
},
|
||||
Token {
|
||||
normalised: "you?",
|
||||
original: " you?",
|
||||
},
|
||||
]
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
---
|
||||
source: reconcile/src/tokenizer/word_tokenizer.rs
|
||||
expression: "word_tokenizer(\"Hi there!\")"
|
||||
snapshot_kind: text
|
||||
---
|
||||
[
|
||||
Token {
|
||||
normalised: "Hi",
|
||||
original: "Hi",
|
||||
},
|
||||
Token {
|
||||
normalised: "there!",
|
||||
original: " there!",
|
||||
},
|
||||
]
|
||||
|
|
@ -8,24 +8,19 @@ use serde::{Deserialize, Serialize};
|
|||
#[derive(Debug, Clone)]
|
||||
pub struct Token<T>
|
||||
where
|
||||
T: PartialEq + Clone,
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
normalised: T,
|
||||
original: String,
|
||||
}
|
||||
|
||||
impl From<&str> for Token<String> {
|
||||
fn from(s: &str) -> Self {
|
||||
Token {
|
||||
normalised: s.to_owned(),
|
||||
original: s.to_owned(),
|
||||
}
|
||||
}
|
||||
fn from(s: &str) -> Self { Token::new(s.trim().to_owned(), s.to_owned()) }
|
||||
}
|
||||
|
||||
impl<T> Token<T>
|
||||
where
|
||||
T: PartialEq + Clone,
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
pub fn new(normalised: T, original: String) -> Self {
|
||||
Token {
|
||||
|
|
@ -43,7 +38,7 @@ where
|
|||
|
||||
impl<T> PartialEq for Token<T>
|
||||
where
|
||||
T: PartialEq + Clone,
|
||||
T: PartialEq + Clone + std::fmt::Debug,
|
||||
{
|
||||
fn eq(&self, other: &Self) -> bool { self.normalised == other.normalised }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,48 @@
|
|||
use super::token::Token;
|
||||
|
||||
/// Splits on whitespace keeping the leading whitespace.
|
||||
///
|
||||
///
|
||||
/// ## Example
|
||||
///
|
||||
/// "Hi there!" -> ["Hi", " there!"]
|
||||
pub fn word_tokenizer(text: &str) -> Vec<Token<String>> {
|
||||
text.split_inclusive(char::is_whitespace)
|
||||
.map(|s| Token::new(s.to_owned(), s.to_owned()))
|
||||
.collect()
|
||||
let mut result: Vec<Token<String>> = Vec::new();
|
||||
|
||||
let mut last_whitespace = 0;
|
||||
let mut previous_char_is_whitespace = true;
|
||||
|
||||
for (i, c) in text.char_indices() {
|
||||
let is_current_char_whitespace = c.is_whitespace();
|
||||
if !previous_char_is_whitespace && is_current_char_whitespace {
|
||||
result.push(text[last_whitespace..i].into());
|
||||
last_whitespace = i;
|
||||
}
|
||||
|
||||
previous_char_is_whitespace = is_current_char_whitespace;
|
||||
}
|
||||
|
||||
if last_whitespace < text.len() {
|
||||
result.push(text[last_whitespace..].into());
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use insta::assert_debug_snapshot;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_with_snapshots() {
|
||||
assert_debug_snapshot!(word_tokenizer("Hi there!"));
|
||||
|
||||
assert_debug_snapshot!(word_tokenizer(""));
|
||||
|
||||
assert_debug_snapshot!(word_tokenizer(" what? "));
|
||||
|
||||
assert_debug_snapshot!(word_tokenizer(" hello, \nwhere are you?"));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue