Lint
This commit is contained in:
parent
94f9c0d594
commit
5c3b87f2d5
69 changed files with 1334 additions and 213 deletions
|
|
@ -81,11 +81,7 @@ def find_bad_files(
|
|||
bad: list[BadFile] = []
|
||||
stats: dict[str, dict] = {}
|
||||
|
||||
modes = sorted(
|
||||
d
|
||||
for d in os.listdir(base_dir)
|
||||
if (base_dir / d).is_dir()
|
||||
)
|
||||
modes = sorted(d for d in os.listdir(base_dir) if (base_dir / d).is_dir())
|
||||
|
||||
for mode in modes:
|
||||
mode_dir = base_dir / mode
|
||||
|
|
@ -149,7 +145,9 @@ def find_duplicates(base_dir: Path) -> tuple[list[BadFile], dict[str, dict]]:
|
|||
# Keep the file with the most rows
|
||||
files.sort(key=lambda x: x[1], reverse=True)
|
||||
for filename, rows in files[1:]:
|
||||
dupes.append(BadFile(mode=mode, filename=filename, slug=slug, rows=rows))
|
||||
dupes.append(
|
||||
BadFile(mode=mode, filename=filename, slug=slug, rows=rows)
|
||||
)
|
||||
mode_dupes += 1
|
||||
|
||||
duped_slugs = sum(1 for fs in slug_files.values() if len(fs) > 1)
|
||||
|
|
@ -197,7 +195,9 @@ def main() -> None:
|
|||
bad_files, stats = find_bad_files(args.travel_times, args.threshold_pct)
|
||||
|
||||
print("=== Per-mode summary ===\n")
|
||||
print(f"{'Mode':<10} {'Total':>6} {'Bad':>5} {'Threshold':>10} {'Median':>8} {'Range':>20}")
|
||||
print(
|
||||
f"{'Mode':<10} {'Total':>6} {'Bad':>5} {'Threshold':>10} {'Median':>8} {'Range':>20}"
|
||||
)
|
||||
print("-" * 65)
|
||||
for mode, s in sorted(stats.items()):
|
||||
rng = f"{s['min']:,}–{s['max']:,}"
|
||||
|
|
@ -231,7 +231,9 @@ def main() -> None:
|
|||
total_removable = sum(s["removable"] for s in dupe_stats.values())
|
||||
if total_removable > 0:
|
||||
print(f"\n=== Duplicates ({total_removable} removable files) ===\n")
|
||||
print(f"{'Mode':<10} {'Total':>6} {'Unique':>7} {'Duped slugs':>12} {'Removable':>10}")
|
||||
print(
|
||||
f"{'Mode':<10} {'Total':>6} {'Unique':>7} {'Duped slugs':>12} {'Removable':>10}"
|
||||
)
|
||||
print("-" * 50)
|
||||
for mode, s in sorted(dupe_stats.items()):
|
||||
if s["removable"] > 0:
|
||||
|
|
@ -242,9 +244,15 @@ def main() -> None:
|
|||
|
||||
if args.dedup:
|
||||
# Exclude files already deleted by --delete
|
||||
deleted_set = {(bf.mode, bf.filename) for bf in bad_files} if args.delete else set()
|
||||
to_delete = [df for df in dupe_files if (df.mode, df.filename) not in deleted_set]
|
||||
print(f"\nRemoving {len(to_delete)} duplicate files (keeping largest per slug)...")
|
||||
deleted_set = (
|
||||
{(bf.mode, bf.filename) for bf in bad_files} if args.delete else set()
|
||||
)
|
||||
to_delete = [
|
||||
df for df in dupe_files if (df.mode, df.filename) not in deleted_set
|
||||
]
|
||||
print(
|
||||
f"\nRemoving {len(to_delete)} duplicate files (keeping largest per slug)..."
|
||||
)
|
||||
deleted = _delete_files(args.travel_times, to_delete)
|
||||
print(f"Deleted {deleted}/{len(to_delete)} files.")
|
||||
else:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue