This commit is contained in:
Andras Schmelczer 2026-04-24 20:56:03 +01:00
parent a7b588da97
commit 19d5dc1999
11 changed files with 358 additions and 355 deletions

View file

@ -1,85 +0,0 @@
import { describe, it } from "node:test";
import assert from "node:assert";
import { buildConflictFileName, isConflictPath } from "./conflict-path";
describe("buildConflictFileName", () => {
it("truncates to the filesystem byte limit while preserving the extension", () => {
const result = buildConflictFileName(`${"a".repeat(300)}.md`);
assert.ok(Buffer.byteLength(result, "utf8") <= 255);
assert.ok(result.endsWith(".md"));
});
it("truncates on a codepoint boundary for multi-byte UTF-8 names", () => {
// "🎉" is 4 bytes in UTF-8; splitting one would yield U+FFFD.
const result = buildConflictFileName(`${"🎉".repeat(100)}.md`);
assert.ok(Buffer.byteLength(result, "utf8") <= 255);
assert.ok(!result.includes("<22>"));
});
it("does not split a ZWJ emoji sequence", () => {
// 👨‍👩‍👧 is one grapheme but 5 code points joined by U+200D.
// A codepoint-only truncation can leave a dangling ZWJ.
const family = "\u{1F468}\u{1F469}\u{1F467}";
const result = buildConflictFileName(`${family.repeat(20)}.md`);
assert.ok(Buffer.byteLength(result, "utf8") <= 255);
const stem = result.slice(
"conflict-".length + 36 + 1,
result.length - ".md".length
);
assert.strictEqual(
stem.length % family.length,
0,
"stem length must be a whole number of families"
);
assert.ok(
!stem.endsWith(""),
"stem must not end with a dangling ZWJ"
);
});
it("does not split a base character from its combining mark", () => {
// NFD "é" = "e" (U+0065) + combining acute (U+0301): one grapheme,
// two code points. A codepoint-only loop can strand the accent.
const grapheme = "é";
const result = buildConflictFileName(`${grapheme.repeat(150)}.md`);
assert.ok(Buffer.byteLength(result, "utf8") <= 255);
const stem = result.slice(
"conflict-".length + 36 + 1,
result.length - ".md".length
);
assert.strictEqual(
stem.length % grapheme.length,
0,
"stem length must be a whole number of graphemes"
);
assert.ok(
!stem.endsWith("́") || stem.endsWith(grapheme),
"combining mark must stay attached to its base character"
);
});
});
describe("isConflictPath", () => {
it("does not misclassify user-authored names that start with `conflict-`", () => {
assert.strictEqual(isConflictPath("conflict-resolution.md"), false);
});
it("only inspects the final path segment", () => {
assert.strictEqual(
isConflictPath(
"conflict-12345678-1234-1234-1234-123456789abc-x/note.md"
),
false
);
assert.strictEqual(
isConflictPath(
"a/b/conflict-12345678-1234-1234-1234-123456789abc-note.md"
),
true
);
});
it("round-trips with buildConflictFileName", () => {
assert.strictEqual(isConflictPath(buildConflictFileName("note.md")), true);
});
});

View file

@ -1,66 +0,0 @@
import type { RelativePath } from "../sync-operations/types";
// Local-only files displaced by `FileOperations.ensureClearPath` are named
// `conflict-<uuid>-<originalName>`. The UUID is a full RFC-4122 v4 value so
// a user-authored filename that happens to start with `conflict-` doesn't
// get misclassified.
const CONFLICT_UUID_REGEX =
/^conflict-[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}-/u;
// Safe segment length for common filesystems (ext4 / NTFS / APFS all cap
// at 255 bytes). `conflict-<36-char-uuid>-` adds 46 bytes; reserve a few
// extra bytes for a future prefix bump and leave room for multi-byte UTF-8
// characters in the original name.
const CONFLICT_PREFIX_LEN = "conflict-".length + 36 + 1;
const MAX_SEGMENT_BYTES = 255;
const MAX_ORIGINAL_BYTES = MAX_SEGMENT_BYTES - CONFLICT_PREFIX_LEN - 4;
export function buildConflictFileName(fileName: string): string {
// Truncate the original name if keeping it whole would bust the
// filesystem's segment-length cap. Preserve the trailing extension
// so the file is still recognizable / openable.
const safeName = truncateFileNameToByteLimit(fileName, MAX_ORIGINAL_BYTES);
return `conflict-${crypto.randomUUID()}-${safeName}`;
}
function truncateFileNameToByteLimit(
fileName: string,
maxBytes: number
): string {
const encoder = new TextEncoder();
if (encoder.encode(fileName).byteLength <= maxBytes) return fileName;
const dotIndex = fileName.lastIndexOf(".");
// Dotfile (starts with "." and nothing else) → no extension to preserve.
const hasExtension = dotIndex > 0;
const extension = hasExtension ? fileName.slice(dotIndex) : "";
const stem = hasExtension ? fileName.slice(0, dotIndex) : fileName;
const extensionBytes = encoder.encode(extension).byteLength;
const stemBudget = Math.max(0, maxBytes - extensionBytes);
// Walk the stem by grapheme cluster so we never split an emoji sequence
// (e.g. ZWJ families, skin-tone modifiers) or a base+combining-mark pair.
const segmenter = new Intl.Segmenter(undefined, { granularity: "grapheme" });
let truncatedStem = "";
let usedBytes = 0;
for (const { segment } of segmenter.segment(stem)) {
const segmentBytes = encoder.encode(segment).byteLength;
if (usedBytes + segmentBytes > stemBudget) break;
truncatedStem += segment;
usedBytes += segmentBytes;
}
return truncatedStem + extension;
}
/**
* Is `path`'s final segment a conflict-displaced filename?
*
* Any sync code that would otherwise create/update/delete/sync the path
* should short-circuit when this returns true: conflict-displaced files are
* strictly local and must stay invisible to the server.
*/
export function isConflictPath(path: RelativePath): boolean {
const fileName = path.substring(path.lastIndexOf("/") + 1);
return CONFLICT_UUID_REGEX.test(fileName);
}