Extract reconcile (#85)

This commit is contained in:
Andras Schmelczer 2025-07-13 11:06:42 +01:00 committed by GitHub
parent 75b020146a
commit bb0e44f06f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
141 changed files with 294 additions and 36720 deletions

View file

@ -0,0 +1,88 @@
use regex::Regex;
pub fn dedup_paths(path: &str) -> impl Iterator<Item = String> {
let mut path_parts = path.split('/').collect::<Vec<_>>();
let file_name = path_parts.pop().unwrap().to_owned();
let mut directory = path_parts.join("/");
if !directory.is_empty() {
directory.push('/');
}
let name_parts = file_name.rsplitn(2, '.').collect::<Vec<_>>();
let mut reverse_parts = name_parts.into_iter().rev();
let (stem, extension) = match (reverse_parts.next(), reverse_parts.next()) {
(Some(stem), maybe_extension) => (
stem.to_owned(),
maybe_extension
.map(|ext| format!(".{ext}"))
.unwrap_or_default(),
),
_ => unreachable!("Path must have at least one part"),
};
let regex = Regex::new(r" \((\d+)\)$").unwrap();
let start_number = regex
.captures(&stem)
.and_then(|caps| caps.get(1))
.and_then(|m| m.as_str().parse::<u32>().ok())
.unwrap_or(0);
let clean_stem = regex.replace(&stem, "").to_string();
(start_number..).map(move |dedup_number| {
if dedup_number == 0 {
format!("{directory}{clean_stem}{extension}")
} else {
format!("{directory}{clean_stem} ({dedup_number}){extension}")
}
})
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_dedup_paths() {
let mut deduped = dedup_paths("file.txt");
assert_eq!(deduped.next(), Some("file.txt".to_owned()));
assert_eq!(deduped.next(), Some("file (1).txt".to_owned()));
assert_eq!(deduped.next(), Some("file (2).txt".to_owned()));
let mut deduped = dedup_paths("file");
assert_eq!(deduped.next(), Some("file".to_owned()));
assert_eq!(deduped.next(), Some("file (1)".to_owned()));
assert_eq!(deduped.next(), Some("file (2)".to_owned()));
let mut deduped = dedup_paths("file (51).md");
assert_eq!(deduped.next(), Some("file (51).md".to_owned()));
assert_eq!(deduped.next(), Some("file (52).md".to_owned()));
assert_eq!(deduped.next(), Some("file (53).md".to_owned()));
let mut deduped = dedup_paths("file (5)");
assert_eq!(deduped.next(), Some("file (5)".to_owned()));
assert_eq!(deduped.next(), Some("file (6)".to_owned()));
assert_eq!(deduped.next(), Some("file (7)".to_owned()));
let mut deduped = dedup_paths("my/path.with.dots/file (5).md");
assert_eq!(
deduped.next(),
Some("my/path.with.dots/file (5).md".to_owned())
);
assert_eq!(
deduped.next(),
Some("my/path.with.dots/file (6).md".to_owned())
);
let mut deduped = dedup_paths("my/path.with.dots/file (5)");
assert_eq!(
deduped.next(),
Some("my/path.with.dots/file (5)".to_owned())
);
assert_eq!(
deduped.next(),
Some("my/path.with.dots/file (6)".to_owned())
);
}
}

View file

@ -0,0 +1,23 @@
pub fn is_file_type_mergable(path_or_file_name: &str) -> bool {
let file_extension = path_or_file_name.split('.').next_back().unwrap_or_default();
matches!(file_extension.to_lowercase().as_str(), "md" | "txt")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_is_file_type_mergable() {
assert!(is_file_type_mergable(".md"));
assert!(is_file_type_mergable("hi.md"));
assert!(is_file_type_mergable("my/path/to/my/document.md"));
assert!(is_file_type_mergable("hi.MD"));
assert!(is_file_type_mergable("my/path/to/my/DOCUMENT.MD"));
assert!(!is_file_type_mergable(".json"));
assert!(!is_file_type_mergable("HELLO.JSON"));
assert!(!is_file_type_mergable("my/config.yml"));
}
}

View file

@ -0,0 +1,11 @@
use serde::{Deserialize, Deserializer};
pub fn normalize<'de, D>(deserializer: D) -> Result<String, D::Error>
where
D: Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
Ok(normalize_string(&s))
}
pub fn normalize_string(s: &str) -> String { s.trim().to_lowercase() }

View file

@ -0,0 +1,34 @@
/// Sanitize the document's path to allow all clients to create the same path in
/// their filesystem. If we didn't do this server-side, client's would need to
/// deal with mapping invalid names to valid ones and then back.
pub fn sanitize_path(path: &str) -> String {
let options = sanitize_filename::Options {
truncate: true,
windows: true, // Windows is the lowest common denominator
replacement: "",
};
path.split('/')
.map(|part| {
let proposal = sanitize_filename::sanitize_with_options(part, options.clone());
if !part.is_empty() && proposal.is_empty() {
"_".to_owned()
} else {
proposal
}
})
.collect::<Vec<_>>()
.join("/")
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_sanitize_path() {
assert_eq!(sanitize_path("/my/path/what?"), "/my/path/what");
assert_eq!(sanitize_path("file (1).md"), "file (1).md");
assert_eq!(sanitize_path("/my/path/\\\\:?"), "/my/path/_");
}
}