From ca3d958a9600d47760f7c87bf22d729907e762ca Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sat, 10 Jan 2026 08:56:35 +1100 Subject: [PATCH] Add `--exclude-path` to `enroll diff` command So that you can ignore certain churn from the diff (stuff you still wanted to harvest as a baseline but don't care if it changes day to day) --- CHANGELOG.md | 1 + README.md | 11 ++++++++--- enroll/cli.py | 11 +++++++++++ enroll/diff.py | 30 ++++++++++++++++++++++++++++++ 4 files changed, 50 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index eeeddb7..5bec45b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ * Introduce `enroll validate` - a tool to validate a harvest against the state schema, or check for missing or orphaned obsolete artifacts in a harvest. * Attempt to generate Jinja2 templates of systemd unit files and Postfix main.cf (now that JinjaTurtle supports it) * Update pynacl dependency to resolve CVE-2025-69277 + * Add `--exclude-path` to `enroll diff` command, so that you can ignore certain churn from the diff (stuff you still wanted to harvest as a baseline but don't care if it changes day to day) # 0.3.0 diff --git a/README.md b/README.md index 7938859..1bafd55 100644 --- a/README.md +++ b/README.md @@ -131,6 +131,7 @@ Compare two harvest bundles and report what changed. **Inputs** - `--old ` and `--new ` (directories or `state.json` paths) - `--sops` when comparing SOPS-encrypted harvest bundles +- `--exclude-path` if you want to ignore certain files that changed in the diff **Output formats** - `--format json` (default for webhooks) @@ -164,8 +165,7 @@ Validates a harvest by checking: * state.json exists and is valid JSON * state.json validates against a JSON Schema (by default the vendored one) * Every `managed_file` entry has a corresponding artifact at: `artifacts//` - -It also warns if there are **unreferenced files** sitting in `artifacts/`. + * That there are no **unreferenced files** sitting in `artifacts/` that aren't in the state. #### Schema location + overrides @@ -400,7 +400,7 @@ enroll single-shot --remote-host myhost.example.com --remote-user myuser --har ## Diff -### Compare two harvest directories +### Compare two harvest directories, output in json ```bash enroll diff --old /path/to/harvestA --new /path/to/harvestB --format json ``` @@ -412,6 +412,11 @@ enroll diff --old /path/to/golden/harvest --new /path/to/new/harvest --web `diff` mode also supports email sending and text or markdown format, as well as `--exit-code` mode to trigger a return code of 2 (useful for crons or CI) +### Ignore a specific directory or file from the diff +```bash +enroll diff --old /path/to/harvestA --new /path/to/harvestB --exclude-path /var/anacron +``` + --- ## Explain diff --git a/enroll/cli.py b/enroll/cli.py index 9f9e63f..32f8030 100644 --- a/enroll/cli.py +++ b/enroll/cli.py @@ -550,6 +550,16 @@ def main() -> None: default="text", help="Report output format (default: text).", ) + d.add_argument( + "--exclude-path", + action="append", + default=[], + metavar="PATTERN", + help=( + "Exclude file paths from the diff report (repeatable). Supports globs (including '**') and regex via 're:'. " + "This affects file drift reporting only (added/removed/changed files), not package/service/user diffs." + ), + ) d.add_argument( "--out", help="Write the report to this file instead of stdout.", @@ -827,6 +837,7 @@ def main() -> None: args.old, args.new, sops_mode=bool(getattr(args, "sops", False)), + exclude_paths=list(getattr(args, "exclude_path", []) or []), ) txt = format_report(report, fmt=str(getattr(args, "format", "text"))) diff --git a/enroll/diff.py b/enroll/diff.py index 5ad0eac..0b3fd69 100644 --- a/enroll/diff.py +++ b/enroll/diff.py @@ -16,6 +16,7 @@ from pathlib import Path from typing import Any, Dict, Iterable, List, Optional, Tuple from .remote import _safe_extract_tar +from .pathfilter import PathFilter from .sopsutil import decrypt_file_binary_to, require_sops_cmd @@ -289,6 +290,7 @@ def compare_harvests( new_path: str, *, sops_mode: bool = False, + exclude_paths: Optional[List[str]] = None, ) -> Tuple[Dict[str, Any], bool]: """Compare two harvests. @@ -387,6 +389,17 @@ def compare_harvests( old_files = _file_index(old_b.dir, old_state) new_files = _file_index(new_b.dir, new_state) + + # Optional user-supplied path exclusions (same semantics as harvest --exclude-path), + # applied only to file drift reporting. + diff_filter = PathFilter(include=(), exclude=exclude_paths or ()) + if exclude_paths: + old_files = { + p: r for p, r in old_files.items() if not diff_filter.is_excluded(p) + } + new_files = { + p: r for p, r in new_files.items() if not diff_filter.is_excluded(p) + } old_paths_set = set(old_files) new_paths_set = set(new_files) @@ -462,6 +475,9 @@ def compare_harvests( report: Dict[str, Any] = { "generated_at": _utc_now_iso(), + "filters": { + "exclude_paths": list(exclude_paths or []), + }, "old": { "input": old_path, "bundle_dir": str(old_b.dir), @@ -532,6 +548,11 @@ def _report_text(report: Dict[str, Any]) -> str: f"new: {new.get('input')} (host={new.get('host')}, state_mtime={new.get('state_mtime')})" ) + filt = report.get("filters", {}) or {} + ex_paths = filt.get("exclude_paths", []) or [] + if ex_paths: + lines.append(f"file exclude patterns: {', '.join(str(p) for p in ex_paths)}") + pk = report.get("packages", {}) lines.append("\nPackages") lines.append(f" added: {len(pk.get('added', []) or [])}") @@ -638,6 +659,15 @@ def _report_markdown(report: Dict[str, Any]) -> str: f"- **New**: `{new.get('input')}` (host={new.get('host')}, state_mtime={new.get('state_mtime')})\n" ) + filt = report.get("filters", {}) or {} + ex_paths = filt.get("exclude_paths", []) or [] + if ex_paths: + out.append( + "- **File exclude patterns**: " + + ", ".join(f"`{p}`" for p in ex_paths) + + "\n" + ) + pk = report.get("packages", {}) out.append("## Packages\n") out.append(f"- Added: {len(pk.get('added', []) or [])}\n")