Add --exclude-path to enroll diff command
Some checks failed
CI / test (push) Failing after 1m45s
Lint / test (push) Successful in 31s
Trivy / test (push) Successful in 23s

So that you can ignore certain churn from the diff

(stuff you still wanted to harvest as a baseline but don't care if it changes day to day)
This commit is contained in:
Miguel Jacq 2026-01-10 08:56:35 +11:00
parent 8be821c494
commit ca3d958a96
Signed by: mig5
GPG key ID: 59B3F0C24135C6A9
4 changed files with 50 additions and 3 deletions

View file

@ -3,6 +3,7 @@
* Introduce `enroll validate` - a tool to validate a harvest against the state schema, or check for missing or orphaned obsolete artifacts in a harvest.
* Attempt to generate Jinja2 templates of systemd unit files and Postfix main.cf (now that JinjaTurtle supports it)
* Update pynacl dependency to resolve CVE-2025-69277
* Add `--exclude-path` to `enroll diff` command, so that you can ignore certain churn from the diff (stuff you still wanted to harvest as a baseline but don't care if it changes day to day)
# 0.3.0

View file

@ -131,6 +131,7 @@ Compare two harvest bundles and report what changed.
**Inputs**
- `--old <harvest>` and `--new <harvest>` (directories or `state.json` paths)
- `--sops` when comparing SOPS-encrypted harvest bundles
- `--exclude-path` if you want to ignore certain files that changed in the diff
**Output formats**
- `--format json` (default for webhooks)
@ -164,8 +165,7 @@ Validates a harvest by checking:
* state.json exists and is valid JSON
* state.json validates against a JSON Schema (by default the vendored one)
* Every `managed_file` entry has a corresponding artifact at: `artifacts/<role_name>/<src_rel>`
It also warns if there are **unreferenced files** sitting in `artifacts/`.
* That there are no **unreferenced files** sitting in `artifacts/` that aren't in the state.
#### Schema location + overrides
@ -400,7 +400,7 @@ enroll single-shot --remote-host myhost.example.com --remote-user myuser --har
## Diff
### Compare two harvest directories
### Compare two harvest directories, output in json
```bash
enroll diff --old /path/to/harvestA --new /path/to/harvestB --format json
```
@ -412,6 +412,11 @@ enroll diff --old /path/to/golden/harvest --new /path/to/new/harvest --web
`diff` mode also supports email sending and text or markdown format, as well as `--exit-code` mode to trigger a return code of 2 (useful for crons or CI)
### Ignore a specific directory or file from the diff
```bash
enroll diff --old /path/to/harvestA --new /path/to/harvestB --exclude-path /var/anacron
```
---
## Explain

View file

@ -550,6 +550,16 @@ def main() -> None:
default="text",
help="Report output format (default: text).",
)
d.add_argument(
"--exclude-path",
action="append",
default=[],
metavar="PATTERN",
help=(
"Exclude file paths from the diff report (repeatable). Supports globs (including '**') and regex via 're:<regex>'. "
"This affects file drift reporting only (added/removed/changed files), not package/service/user diffs."
),
)
d.add_argument(
"--out",
help="Write the report to this file instead of stdout.",
@ -827,6 +837,7 @@ def main() -> None:
args.old,
args.new,
sops_mode=bool(getattr(args, "sops", False)),
exclude_paths=list(getattr(args, "exclude_path", []) or []),
)
txt = format_report(report, fmt=str(getattr(args, "format", "text")))

View file

@ -16,6 +16,7 @@ from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Tuple
from .remote import _safe_extract_tar
from .pathfilter import PathFilter
from .sopsutil import decrypt_file_binary_to, require_sops_cmd
@ -289,6 +290,7 @@ def compare_harvests(
new_path: str,
*,
sops_mode: bool = False,
exclude_paths: Optional[List[str]] = None,
) -> Tuple[Dict[str, Any], bool]:
"""Compare two harvests.
@ -387,6 +389,17 @@ def compare_harvests(
old_files = _file_index(old_b.dir, old_state)
new_files = _file_index(new_b.dir, new_state)
# Optional user-supplied path exclusions (same semantics as harvest --exclude-path),
# applied only to file drift reporting.
diff_filter = PathFilter(include=(), exclude=exclude_paths or ())
if exclude_paths:
old_files = {
p: r for p, r in old_files.items() if not diff_filter.is_excluded(p)
}
new_files = {
p: r for p, r in new_files.items() if not diff_filter.is_excluded(p)
}
old_paths_set = set(old_files)
new_paths_set = set(new_files)
@ -462,6 +475,9 @@ def compare_harvests(
report: Dict[str, Any] = {
"generated_at": _utc_now_iso(),
"filters": {
"exclude_paths": list(exclude_paths or []),
},
"old": {
"input": old_path,
"bundle_dir": str(old_b.dir),
@ -532,6 +548,11 @@ def _report_text(report: Dict[str, Any]) -> str:
f"new: {new.get('input')} (host={new.get('host')}, state_mtime={new.get('state_mtime')})"
)
filt = report.get("filters", {}) or {}
ex_paths = filt.get("exclude_paths", []) or []
if ex_paths:
lines.append(f"file exclude patterns: {', '.join(str(p) for p in ex_paths)}")
pk = report.get("packages", {})
lines.append("\nPackages")
lines.append(f" added: {len(pk.get('added', []) or [])}")
@ -638,6 +659,15 @@ def _report_markdown(report: Dict[str, Any]) -> str:
f"- **New**: `{new.get('input')}` (host={new.get('host')}, state_mtime={new.get('state_mtime')})\n"
)
filt = report.get("filters", {}) or {}
ex_paths = filt.get("exclude_paths", []) or []
if ex_paths:
out.append(
"- **File exclude patterns**: "
+ ", ".join(f"`{p}`" for p in ex_paths)
+ "\n"
)
pk = report.get("packages", {})
out.append("## Packages\n")
out.append(f"- Added: {len(pk.get('added', []) or [])}\n")