From 56d01486146396a7df32c4df8fb2cb4be6afc816 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sun, 4 Jan 2026 21:27:23 +1100 Subject: [PATCH 01/33] Update README --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index d1c1411..50ffe01 100644 --- a/README.md +++ b/README.md @@ -88,6 +88,8 @@ Harvest state about a host and write a harvest bundle. - glob (default): supports `*` and `**` (prefix with `glob:` to force) - regex: prefix with `re:` or `regex:` - Precedence: excludes win over includes. +* Using remote mode and sudo requires password? + - `--ask-become-pass` (or `-K`) will prompt for the password. If you forget, and remote requires password for sudo, it'll still fall back to prompting for a password, but will be a bit slower to do so. --- From 59674d4660cbc08414b71613bb5a4204e2e2bf2d Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 5 Jan 2026 10:16:44 +1100 Subject: [PATCH 02/33] Introduce `enroll explain` A tool to analyze and explain what's in (or not in) a harvest and why. --- .forgejo/workflows/ci.yml | 2 +- CHANGELOG.md | 4 + README.md | 56 ++++ enroll/cli.py | 37 +++ enroll/explain.py | 578 ++++++++++++++++++++++++++++++++++++++ tests.sh | 18 +- 6 files changed, 693 insertions(+), 2 deletions(-) create mode 100644 enroll/explain.py diff --git a/.forgejo/workflows/ci.yml b/.forgejo/workflows/ci.yml index 41efa55..68e1c02 100644 --- a/.forgejo/workflows/ci.yml +++ b/.forgejo/workflows/ci.yml @@ -15,7 +15,7 @@ jobs: run: | apt-get update DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ - ansible ansible-lint python3-venv pipx systemctl python3-apt + ansible ansible-lint python3-venv pipx systemctl python3-apt jq - name: Install Poetry run: | diff --git a/CHANGELOG.md b/CHANGELOG.md index d134011..0f6e465 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +# 0.3.0 + + * Introduce `enroll explain` - a tool to analyze and explain what's in (or not in) a harvest and why. + # 0.2.3 * Introduce --ask-become-pass or -K to support password-required sudo on remote hosts, just like Ansible. It will also fall back to this prompt if a password is required but the arg wasn't passed in. diff --git a/README.md b/README.md index 50ffe01..565c4e8 100644 --- a/README.md +++ b/README.md @@ -145,6 +145,62 @@ Compare two harvest bundles and report what changed. --- +### `enroll explain` +Analyze a harvest and provide user-friendly explanations for what's in it and why. + +This may also explain why something *wasn't* included (e.g a binary file, a file that was too large, unreadable due to permissions, or looked like a log file/secret. + +Provide either the path to the harvest or the path to its state.json. It can also handle SOPS-encrypted harvests. + +Output can be provided in plaintext or json. + +**Examples**: + +``` +enroll explain /path/to/state.json +enroll explain /path/to/bundle_dir +enroll explain /path/to/harvest.tar.gz +enroll explain /path/to/harvest.tar.gz.sops --sops +enroll explain /path/to/state.json --format json --max-examples 5 +``` + +**Example output**: + +``` +❯ poetry run enroll explain /tmp/syrah.harvest +Enroll explain: /tmp/syrah.harvest +Host: syrah.mig5.net (os: debian, pkg: dpkg) +Enroll: 0.2.3 + +Inventory +- Packages: 254 +- Why packages were included (observed_via): + - user_installed: 248 – Package appears explicitly installed (as opposed to only pulled in as a dependency). + - package_role: 232 – Package was referenced by an enroll packages snapshot/role. (e.g. acl, acpid, adduser) + - systemd_unit: 22 – Package is associated with a systemd unit that was harvested. (e.g. postfix.service, tor.service, apparmor.service) + +Roles collected +- users: 1 user(s), 1 file(s), 0 excluded +- services: 19 unit(s), 111 file(s), 6 excluded +- packages: 232 package snapshot(s), 41 file(s), 0 excluded +- apt_config: 26 file(s), 7 dir(s), 10 excluded +- dnf_config: 0 file(s), 0 dir(s), 0 excluded +- etc_custom: 70 file(s), 20 dir(s), 0 excluded +- usr_local_custom: 35 file(s), 1 dir(s), 0 excluded +- extra_paths: 0 file(s), 0 dir(s), 0 excluded + +Why files were included (managed_files.reason) +- custom_unowned (179): A file not owned by any package (often custom/operator-managed).. Examples: /etc/apparmor.d/local/lsb_release, /etc/apparmor.d/local/nvidia_modprobe, /etc/apparmor.d/local/sbin.dhclient +- usr_local_bin_script (35): Executable scripts under /usr/local/bin (often operator-installed).. Examples: /usr/local/bin/check_firewall, /usr/local/bin/awslogs +- apt_keyring (13): Repository signing key material used by APT.. Examples: /etc/apt/keyrings/openvpn-repo-public.asc, /etc/apt/trusted.gpg, /etc/apt/trusted.gpg.d/deb.torproject.org-keyring.gpg +- modified_conffile (10): A package-managed conffile differs from the packaged/default version.. Examples: /etc/dnsmasq.conf, /etc/ssh/moduli, /etc/tor/torrc +- logrotate_snippet (9): logrotate snippets/configs referenced in system configuration.. Examples: /etc/logrotate.d/rsyslog, /etc/logrotate.d/tor, /etc/logrotate.d/apt +- apt_config (7): APT configuration affecting package installation and repository behavior.. Examples: /etc/apt/apt.conf.d/01autoremove, /etc/apt/apt.conf.d/20listchanges, /etc/apt/apt.conf.d/70debconf +[...] +``` + +--- + ## Sensitive data By default, `enroll` does **not** assume how you handle secrets in Ansible. It will attempt to avoid harvesting likely sensitive data (private keys, passwords, tokens, etc.). This can mean it skips some config files you may ultimately want to manage. diff --git a/enroll/cli.py b/enroll/cli.py index 55fdd0b..5624587 100644 --- a/enroll/cli.py +++ b/enroll/cli.py @@ -11,6 +11,7 @@ from typing import Optional from .cache import new_harvest_cache_dir from .diff import compare_harvests, format_report, post_webhook, send_email +from .explain import explain_state from .harvest import harvest from .manifest import manifest from .remote import remote_harvest, RemoteSudoPasswordRequired @@ -605,6 +606,32 @@ def main() -> None: help="Environment variable containing SMTP password (optional).", ) + e = sub.add_parser("explain", help="Explain a harvest state.json") + _add_config_args(e) + e.add_argument( + "harvest", + help=( + "Harvest input (directory, a path to state.json, a tarball, or a SOPS-encrypted bundle)." + ), + ) + e.add_argument( + "--sops", + action="store_true", + help="Treat the input as a SOPS-encrypted bundle (auto-detected if the filename ends with .sops).", + ) + e.add_argument( + "--format", + choices=["text", "json"], + default="text", + help="Output format.", + ) + e.add_argument( + "--max-examples", + type=int, + default=3, + help="How many example paths/refs to show per reason.", + ) + argv = sys.argv[1:] cfg_path = _discover_config_path(argv) argv = _inject_config_argv( @@ -616,6 +643,7 @@ def main() -> None: "manifest": m, "single-shot": s, "diff": d, + "explain": e, }, ) args = ap.parse_args(argv) @@ -702,6 +730,15 @@ def main() -> None: exclude_paths=list(getattr(args, "exclude_path", []) or []), ) print(path) + elif args.cmd == "explain": + out = explain_state( + args.harvest, + sops_mode=bool(getattr(args, "sops", False)), + fmt=str(getattr(args, "format", "text")), + max_examples=int(getattr(args, "max_examples", 3)), + ) + sys.stdout.write(out) + elif args.cmd == "manifest": out_enc = manifest( args.harvest, diff --git a/enroll/explain.py b/enroll/explain.py new file mode 100644 index 0000000..835f207 --- /dev/null +++ b/enroll/explain.py @@ -0,0 +1,578 @@ +from __future__ import annotations + +import json +from collections import Counter, defaultdict +from dataclasses import dataclass +from typing import Any, Dict, Iterable, List, Tuple + +from .diff import _bundle_from_input, _load_state # reuse existing bundle handling + + +@dataclass(frozen=True) +class ReasonInfo: + title: str + why: str + + +_MANAGED_FILE_REASONS: Dict[str, ReasonInfo] = { + # Package manager / repo config + "apt_config": ReasonInfo( + "APT configuration", + "APT configuration affecting package installation and repository behavior.", + ), + "apt_source": ReasonInfo( + "APT repository source", + "APT source list entries (e.g. sources.list or sources.list.d).", + ), + "apt_keyring": ReasonInfo( + "APT keyring", + "Repository signing key material used by APT.", + ), + "apt_signed_by_keyring": ReasonInfo( + "APT Signed-By keyring", + "Keyring referenced via a Signed-By directive in an APT source.", + ), + "yum_conf": ReasonInfo( + "YUM/DNF main config", + "Primary YUM configuration (often /etc/yum.conf).", + ), + "yum_config": ReasonInfo( + "YUM/DNF config", + "YUM/DNF configuration files (including conf.d).", + ), + "yum_repo": ReasonInfo( + "YUM/DNF repository", + "YUM/DNF repository definitions (e.g. yum.repos.d).", + ), + "dnf_config": ReasonInfo( + "DNF configuration", + "DNF configuration affecting package installation and repositories.", + ), + "rpm_gpg_key": ReasonInfo( + "RPM GPG key", + "Repository signing keys used by RPM/YUM/DNF.", + ), + # SSH + "authorized_keys": ReasonInfo( + "SSH authorized keys", + "User authorized_keys files (controls who can log in with SSH keys).", + ), + "ssh_public_key": ReasonInfo( + "SSH public key", + "SSH host/user public keys relevant to authentication.", + ), + # System config / security + "system_security": ReasonInfo( + "Security configuration", + "Security-sensitive configuration (SSH, sudoers, PAM, auth, etc.).", + ), + "system_network": ReasonInfo( + "Network configuration", + "Network configuration (interfaces, resolv.conf, network managers, etc.).", + ), + "system_firewall": ReasonInfo( + "Firewall configuration", + "Firewall rules/configuration (ufw, nftables, iptables, etc.).", + ), + "system_sysctl": ReasonInfo( + "sysctl configuration", + "Kernel sysctl tuning (sysctl.conf / sysctl.d).", + ), + "system_modprobe": ReasonInfo( + "modprobe configuration", + "Kernel module configuration (modprobe.d).", + ), + "system_mounts": ReasonInfo( + "Mount configuration", + "Mount configuration (e.g. /etc/fstab and related).", + ), + "system_rc": ReasonInfo( + "Startup/rc configuration", + "Startup scripts / rc configuration that can affect boot behavior.", + ), + # systemd + timers + "systemd_dropin": ReasonInfo( + "systemd drop-in", + "systemd override/drop-in files that modify a unit's behavior.", + ), + "systemd_envfile": ReasonInfo( + "systemd EnvironmentFile", + "Files referenced by systemd units via EnvironmentFile.", + ), + "related_timer": ReasonInfo( + "Related systemd timer", + "A systemd timer captured because it is related to a unit/service.", + ), + # cron / logrotate + "system_cron": ReasonInfo( + "System cron", + "System cron configuration (crontab, cron.d, etc.).", + ), + "cron_snippet": ReasonInfo( + "Cron snippet", + "Cron snippets referenced/used by harvested services or configs.", + ), + "system_logrotate": ReasonInfo( + "System logrotate", + "System logrotate configuration.", + ), + "logrotate_snippet": ReasonInfo( + "logrotate snippet", + "logrotate snippets/configs referenced in system configuration.", + ), + # Custom paths / drift signals + "modified_conffile": ReasonInfo( + "Modified package conffile", + "A package-managed conffile differs from the packaged/default version.", + ), + "modified_packaged_file": ReasonInfo( + "Modified packaged file", + "A file owned by a package differs from the packaged version.", + ), + "custom_unowned": ReasonInfo( + "Unowned custom file", + "A file not owned by any package (often custom/operator-managed).", + ), + "custom_specific_path": ReasonInfo( + "Custom specific path", + "A specific path included by a custom rule or snapshot.", + ), + "usr_local_bin_script": ReasonInfo( + "/usr/local/bin script", + "Executable scripts under /usr/local/bin (often operator-installed).", + ), + "usr_local_etc_custom": ReasonInfo( + "/usr/local/etc custom", + "Custom configuration under /usr/local/etc.", + ), + # User includes + "user_include": ReasonInfo( + "User-included path", + "Included because you specified it via --include-path / include patterns.", + ), +} + +_MANAGED_DIR_REASONS: Dict[str, ReasonInfo] = { + "parent_of_managed_file": ReasonInfo( + "Parent directory", + "Included so permissions/ownership can be recreated for managed files.", + ), + "user_include_dir": ReasonInfo( + "User-included directory", + "Included because you specified it via --include-path / include patterns.", + ), +} + +_EXCLUDED_REASONS: Dict[str, ReasonInfo] = { + "user_excluded": ReasonInfo( + "User excluded", + "Excluded because you explicitly excluded it (e.g. --exclude-path / patterns).", + ), + "unreadable": ReasonInfo( + "Unreadable", + "Enroll could not read this path with the permissions it had.", + ), + "log_file": ReasonInfo( + "Log file", + "Excluded because it appears to be a log file (usually noisy/large).", + ), + "denied_path": ReasonInfo( + "Denied path", + "Excluded because the path is in a denylist for safety.", + ), + "too_large": ReasonInfo( + "Too large", + "Excluded because it exceeded the size limit for harvested files.", + ), + "not_regular_file": ReasonInfo( + "Not a regular file", + "Excluded because it was not a regular file (device, socket, etc.).", + ), + "binary_like": ReasonInfo( + "Binary-like", + "Excluded because it looked like binary content (not useful for config management).", + ), + "sensitive_content": ReasonInfo( + "Sensitive content", + "Excluded because it likely contains secrets (e.g. shadow, private keys).", + ), +} + +_OBSERVED_VIA: Dict[str, ReasonInfo] = { + "user_installed": ReasonInfo( + "User-installed", + "Package appears explicitly installed (as opposed to only pulled in as a dependency).", + ), + "systemd_unit": ReasonInfo( + "Referenced by systemd unit", + "Package is associated with a systemd unit that was harvested.", + ), + "package_role": ReasonInfo( + "Referenced by package role", + "Package was referenced by an enroll packages snapshot/role.", + ), +} + + +def _ri(mapping: Dict[str, ReasonInfo], key: str) -> ReasonInfo: + return mapping.get(key) or ReasonInfo(key, f"Captured with reason '{key}'") + + +def _role_common_counts(role_obj: Dict[str, Any]) -> Tuple[int, int, int, int]: + """Return (managed_files, managed_dirs, excluded, notes) counts for a RoleCommon object.""" + mf = len(role_obj.get("managed_files") or []) + md = len(role_obj.get("managed_dirs") or []) + ex = len(role_obj.get("excluded") or []) + nt = len(role_obj.get("notes") or []) + return mf, md, ex, nt + + +def _summarize_reasons( + items: Iterable[Dict[str, Any]], + reason_key: str, + *, + mapping: Dict[str, ReasonInfo], + max_examples: int, +) -> List[Dict[str, Any]]: + by_reason: Dict[str, List[str]] = defaultdict(list) + counts: Counter[str] = Counter() + + for it in items: + if not isinstance(it, dict): + continue + r = it.get(reason_key) + if not r: + continue + r = str(r) + counts[r] += 1 + p = it.get("path") + if ( + max_examples > 0 + and isinstance(p, str) + and p + and len(by_reason[r]) < max_examples + ): + by_reason[r].append(p) + + out: List[Dict[str, Any]] = [] + for reason, count in counts.most_common(): + info = _ri(mapping, reason) + out.append( + { + "reason": reason, + "count": count, + "title": info.title, + "why": info.why, + "examples": by_reason.get(reason, []), + } + ) + return out + + +def explain_state( + harvest: str, + *, + sops_mode: bool = False, + fmt: str = "text", + max_examples: int = 3, +) -> str: + """Explain a harvest bundle's state.json. + + `harvest` may be: + - a bundle directory + - a path to state.json + - a tarball (.tar.gz/.tgz) + - a SOPS-encrypted bundle (.sops) + """ + bundle = _bundle_from_input(harvest, sops_mode=sops_mode) + state = _load_state(bundle.dir) + + host = state.get("host") or {} + enroll = state.get("enroll") or {} + roles = state.get("roles") or {} + inv = state.get("inventory") or {} + inv_pkgs = (inv.get("packages") or {}) if isinstance(inv, dict) else {} + + role_summaries: List[Dict[str, Any]] = [] + + # Users + users_obj = roles.get("users") or {} + user_entries = users_obj.get("users") or [] + mf, md, ex, _nt = ( + _role_common_counts(users_obj) if isinstance(users_obj, dict) else (0, 0, 0, 0) + ) + role_summaries.append( + { + "role": "users", + "summary": f"{len(user_entries)} user(s), {mf} file(s), {ex} excluded", + "notes": users_obj.get("notes") or [], + } + ) + + # Services + services_list = roles.get("services") or [] + if isinstance(services_list, list): + total_mf = sum( + len((s.get("managed_files") or [])) + for s in services_list + if isinstance(s, dict) + ) + total_ex = sum( + len((s.get("excluded") or [])) for s in services_list if isinstance(s, dict) + ) + role_summaries.append( + { + "role": "services", + "summary": f"{len(services_list)} unit(s), {total_mf} file(s), {total_ex} excluded", + "units": [ + { + "unit": s.get("unit"), + "active_state": s.get("active_state"), + "sub_state": s.get("sub_state"), + "unit_file_state": s.get("unit_file_state"), + "condition_result": s.get("condition_result"), + } + for s in services_list + if isinstance(s, dict) + ], + } + ) + + # Package snapshots + pkgs_list = roles.get("packages") or [] + if isinstance(pkgs_list, list): + total_mf = sum( + len((p.get("managed_files") or [])) + for p in pkgs_list + if isinstance(p, dict) + ) + total_ex = sum( + len((p.get("excluded") or [])) for p in pkgs_list if isinstance(p, dict) + ) + role_summaries.append( + { + "role": "packages", + "summary": f"{len(pkgs_list)} package snapshot(s), {total_mf} file(s), {total_ex} excluded", + "packages": [ + p.get("package") for p in pkgs_list if isinstance(p, dict) + ], + } + ) + + # Single snapshots + for rname in [ + "apt_config", + "dnf_config", + "etc_custom", + "usr_local_custom", + "extra_paths", + ]: + robj = roles.get(rname) or {} + if not isinstance(robj, dict): + continue + mf, md, ex, _nt = _role_common_counts(robj) + extra: Dict[str, Any] = {} + if rname == "extra_paths": + extra = { + "include_patterns": robj.get("include_patterns") or [], + "exclude_patterns": robj.get("exclude_patterns") or [], + } + role_summaries.append( + { + "role": rname, + "summary": f"{mf} file(s), {md} dir(s), {ex} excluded", + "notes": robj.get("notes") or [], + **extra, + } + ) + + # Flatten managed/excluded across roles + all_managed_files: List[Dict[str, Any]] = [] + all_managed_dirs: List[Dict[str, Any]] = [] + all_excluded: List[Dict[str, Any]] = [] + + def _consume_role(role_obj: Dict[str, Any]) -> None: + for f in role_obj.get("managed_files") or []: + if isinstance(f, dict): + all_managed_files.append(f) + for d in role_obj.get("managed_dirs") or []: + if isinstance(d, dict): + all_managed_dirs.append(d) + for e in role_obj.get("excluded") or []: + if isinstance(e, dict): + all_excluded.append(e) + + if isinstance(users_obj, dict): + _consume_role(users_obj) + if isinstance(services_list, list): + for s in services_list: + if isinstance(s, dict): + _consume_role(s) + if isinstance(pkgs_list, list): + for p in pkgs_list: + if isinstance(p, dict): + _consume_role(p) + for rname in [ + "apt_config", + "dnf_config", + "etc_custom", + "usr_local_custom", + "extra_paths", + ]: + robj = roles.get(rname) + if isinstance(robj, dict): + _consume_role(robj) + + managed_file_reasons = _summarize_reasons( + all_managed_files, + "reason", + mapping=_MANAGED_FILE_REASONS, + max_examples=max_examples, + ) + managed_dir_reasons = _summarize_reasons( + all_managed_dirs, + "reason", + mapping=_MANAGED_DIR_REASONS, + max_examples=max_examples, + ) + excluded_reasons = _summarize_reasons( + all_excluded, + "reason", + mapping=_EXCLUDED_REASONS, + max_examples=max_examples, + ) + + # Inventory observed_via breakdown (count packages that contain at least one entry for that kind) + observed_kinds: Counter[str] = Counter() + observed_refs: Dict[str, Counter[str]] = defaultdict(Counter) + for _pkg, entry in inv_pkgs.items(): + if not isinstance(entry, dict): + continue + seen_kinds = set() + for ov in entry.get("observed_via") or []: + if not isinstance(ov, dict): + continue + kind = ov.get("kind") + if not kind: + continue + kind = str(kind) + seen_kinds.add(kind) + ref = ov.get("ref") + if isinstance(ref, str) and ref: + observed_refs[kind][ref] += 1 + for k in seen_kinds: + observed_kinds[k] += 1 + + observed_via_summary: List[Dict[str, Any]] = [] + for kind, cnt in observed_kinds.most_common(): + info = _ri(_OBSERVED_VIA, kind) + top_refs = [ + r for r, _ in observed_refs.get(kind, Counter()).most_common(max_examples) + ] + observed_via_summary.append( + { + "kind": kind, + "count": cnt, + "title": info.title, + "why": info.why, + "top_refs": top_refs, + } + ) + + report: Dict[str, Any] = { + "bundle_dir": str(bundle.dir), + "host": host, + "enroll": enroll, + "inventory": { + "package_count": len(inv_pkgs), + "observed_via": observed_via_summary, + }, + "roles": role_summaries, + "reasons": { + "managed_files": managed_file_reasons, + "managed_dirs": managed_dir_reasons, + "excluded": excluded_reasons, + }, + } + + if fmt == "json": + return json.dumps(report, indent=2, sort_keys=True) + + # Text rendering + out: List[str] = [] + out.append(f"Enroll explained: {harvest}") + hn = host.get("hostname") or "(unknown host)" + os_family = host.get("os") or "unknown" + pkg_backend = host.get("pkg_backend") or "?" + ver = enroll.get("version") or "?" + out.append(f"Host: {hn} (os: {os_family}, pkg: {pkg_backend})") + out.append(f"Enroll: {ver}") + out.append("") + + out.append("Inventory") + out.append(f"- Packages: {len(inv_pkgs)}") + if observed_via_summary: + out.append("- Why packages were included (observed_via):") + for ov in observed_via_summary: + extra = "" + if ov.get("top_refs"): + extra = f" (e.g. {', '.join(ov['top_refs'])})" + out.append(f" - {ov['kind']}: {ov['count']} – {ov['why']}{extra}") + out.append("") + + out.append("Roles collected") + for rs in role_summaries: + out.append(f"- {rs['role']}: {rs['summary']}") + if rs["role"] == "extra_paths": + inc = rs.get("include_patterns") or [] + exc = rs.get("exclude_patterns") or [] + if inc: + suffix = "…" if len(inc) > max_examples else "" + out.append( + f" include_patterns: {', '.join(map(str, inc[:max_examples]))}{suffix}" + ) + if exc: + suffix = "…" if len(exc) > max_examples else "" + out.append( + f" exclude_patterns: {', '.join(map(str, exc[:max_examples]))}{suffix}" + ) + notes = rs.get("notes") or [] + if notes: + for n in notes[:max_examples]: + out.append(f" note: {n}") + if len(notes) > max_examples: + out.append( + f" note: (+{len(notes) - max_examples} more. Use --format json to see them all)" + ) + out.append("") + + out.append("Why files were included (managed_files.reason)") + if managed_file_reasons: + for r in managed_file_reasons[:15]: + exs = r.get("examples") or [] + ex_txt = f" Examples: {', '.join(exs)}" if exs else "" + out.append(f"- {r['reason']} ({r['count']}): {r['why']}.{ex_txt}") + if len(managed_file_reasons) > 15: + out.append( + f"- (+{len(managed_file_reasons) - 15} more reasons. Use --format json to see them all)" + ) + else: + out.append("- (no managed files)") + + if managed_dir_reasons: + out.append("") + out.append("Why directories were included (managed_dirs.reason)") + for r in managed_dir_reasons: + out.append(f"- {r['reason']} ({r['count']}): {r['why']}") + + out.append("") + out.append("Why paths were excluded") + if excluded_reasons: + for r in excluded_reasons: + exs = r.get("examples") or [] + ex_txt = f" Examples: {', '.join(exs)}" if exs else "" + out.append(f"- {r['reason']} ({r['count']}): {r['why']}.{ex_txt}") + else: + out.append("- (no excluded paths)") + + return "\n".join(out) + "\n" diff --git a/tests.sh b/tests.sh index 6becc39..0dc50dc 100755 --- a/tests.sh +++ b/tests.sh @@ -15,8 +15,24 @@ poetry run \ --harvest "${BUNDLE_DIR}" \ --out "${ANSIBLE_DIR}" -builtin cd "${ANSIBLE_DIR}" +# Analyse +poetry run \ + enroll explain "${BUNDLE_DIR}" +poetry run \ + enroll explain "${BUNDLE_DIR}" --format json | jq +# Install something, harvest again and diff the harvests +sudo apt-get -y install cowsay +poetry run \ + enroll harvest --out "${BUNDLE_DIR}2" +poetry run \ + enroll diff \ + --old "${BUNDLE_DIR}" \ + --new "${BUNDLE_DIR}2" \ + --format json | jq + +# Ansible test +builtin cd "${ANSIBLE_DIR}" # Lint ansible-lint "${ANSIBLE_DIR}" From c9003d589db02da5e58ee30153855588b5d10155 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 5 Jan 2026 10:23:15 +1100 Subject: [PATCH 03/33] Fix test. Update README --- README.md | 106 +++++++++++++++++++++++++++++++----------------------- tests.sh | 2 +- 2 files changed, 62 insertions(+), 46 deletions(-) diff --git a/README.md b/README.md index 565c4e8..55d87cb 100644 --- a/README.md +++ b/README.md @@ -154,51 +154,6 @@ Provide either the path to the harvest or the path to its state.json. It can als Output can be provided in plaintext or json. -**Examples**: - -``` -enroll explain /path/to/state.json -enroll explain /path/to/bundle_dir -enroll explain /path/to/harvest.tar.gz -enroll explain /path/to/harvest.tar.gz.sops --sops -enroll explain /path/to/state.json --format json --max-examples 5 -``` - -**Example output**: - -``` -❯ poetry run enroll explain /tmp/syrah.harvest -Enroll explain: /tmp/syrah.harvest -Host: syrah.mig5.net (os: debian, pkg: dpkg) -Enroll: 0.2.3 - -Inventory -- Packages: 254 -- Why packages were included (observed_via): - - user_installed: 248 – Package appears explicitly installed (as opposed to only pulled in as a dependency). - - package_role: 232 – Package was referenced by an enroll packages snapshot/role. (e.g. acl, acpid, adduser) - - systemd_unit: 22 – Package is associated with a systemd unit that was harvested. (e.g. postfix.service, tor.service, apparmor.service) - -Roles collected -- users: 1 user(s), 1 file(s), 0 excluded -- services: 19 unit(s), 111 file(s), 6 excluded -- packages: 232 package snapshot(s), 41 file(s), 0 excluded -- apt_config: 26 file(s), 7 dir(s), 10 excluded -- dnf_config: 0 file(s), 0 dir(s), 0 excluded -- etc_custom: 70 file(s), 20 dir(s), 0 excluded -- usr_local_custom: 35 file(s), 1 dir(s), 0 excluded -- extra_paths: 0 file(s), 0 dir(s), 0 excluded - -Why files were included (managed_files.reason) -- custom_unowned (179): A file not owned by any package (often custom/operator-managed).. Examples: /etc/apparmor.d/local/lsb_release, /etc/apparmor.d/local/nvidia_modprobe, /etc/apparmor.d/local/sbin.dhclient -- usr_local_bin_script (35): Executable scripts under /usr/local/bin (often operator-installed).. Examples: /usr/local/bin/check_firewall, /usr/local/bin/awslogs -- apt_keyring (13): Repository signing key material used by APT.. Examples: /etc/apt/keyrings/openvpn-repo-public.asc, /etc/apt/trusted.gpg, /etc/apt/trusted.gpg.d/deb.torproject.org-keyring.gpg -- modified_conffile (10): A package-managed conffile differs from the packaged/default version.. Examples: /etc/dnsmasq.conf, /etc/ssh/moduli, /etc/tor/torrc -- logrotate_snippet (9): logrotate snippets/configs referenced in system configuration.. Examples: /etc/logrotate.d/rsyslog, /etc/logrotate.d/tor, /etc/logrotate.d/apt -- apt_config (7): APT configuration affecting package installation and repository behavior.. Examples: /etc/apt/apt.conf.d/01autoremove, /etc/apt/apt.conf.d/20listchanges, /etc/apt/apt.conf.d/70debconf -[...] -``` - --- ## Sensitive data @@ -402,6 +357,67 @@ enroll diff --old /path/to/golden/harvest --new /path/to/new/harvest --web --- +## Explain + +### Explain a harvest + +All of these do the same thing: + +```bash +enroll explain /path/to/state.json +enroll explain /path/to/bundle_dir +enroll explain /path/to/harvest.tar.gz +``` + +### Explain a SOPS-encrypted harvest + +```bash +enroll explain /path/to/harvest.tar.gz.sops --sops +``` + +### Explain with JSON output and more examples + +```bash +enroll explain /path/to/state.json --format json --max-examples 25 +``` + +### Example output + +``` +❯ enroll explain /tmp/syrah.harvest +Enroll explain: /tmp/syrah.harvest +Host: syrah.mig5.net (os: debian, pkg: dpkg) +Enroll: 0.2.3 + +Inventory +- Packages: 254 +- Why packages were included (observed_via): + - user_installed: 248 – Package appears explicitly installed (as opposed to only pulled in as a dependency). + - package_role: 232 – Package was referenced by an enroll packages snapshot/role. (e.g. acl, acpid, adduser) + - systemd_unit: 22 – Package is associated with a systemd unit that was harvested. (e.g. postfix.service, tor.service, apparmor.service) + +Roles collected +- users: 1 user(s), 1 file(s), 0 excluded +- services: 19 unit(s), 111 file(s), 6 excluded +- packages: 232 package snapshot(s), 41 file(s), 0 excluded +- apt_config: 26 file(s), 7 dir(s), 10 excluded +- dnf_config: 0 file(s), 0 dir(s), 0 excluded +- etc_custom: 70 file(s), 20 dir(s), 0 excluded +- usr_local_custom: 35 file(s), 1 dir(s), 0 excluded +- extra_paths: 0 file(s), 0 dir(s), 0 excluded + +Why files were included (managed_files.reason) +- custom_unowned (179): A file not owned by any package (often custom/operator-managed).. Examples: /etc/apparmor.d/local/lsb_release, /etc/apparmor.d/local/nvidia_modprobe, /etc/apparmor.d/local/sbin.dhclient +- usr_local_bin_script (35): Executable scripts under /usr/local/bin (often operator-installed).. Examples: /usr/local/bin/check_firewall, /usr/local/bin/awslogs +- apt_keyring (13): Repository signing key material used by APT.. Examples: /etc/apt/keyrings/openvpn-repo-public.asc, /etc/apt/trusted.gpg, /etc/apt/trusted.gpg.d/deb.torproject.org-keyring.gpg +- modified_conffile (10): A package-managed conffile differs from the packaged/default version.. Examples: /etc/dnsmasq.conf, /etc/ssh/moduli, /etc/tor/torrc +- logrotate_snippet (9): logrotate snippets/configs referenced in system configuration.. Examples: /etc/logrotate.d/rsyslog, /etc/logrotate.d/tor, /etc/logrotate.d/apt +- apt_config (7): APT configuration affecting package installation and repository behavior.. Examples: /etc/apt/apt.conf.d/01autoremove, /etc/apt/apt.conf.d/20listchanges, /etc/apt/apt.conf.d/70debconf +[...] +``` + +--- + ## Run Ansible ### Single-site diff --git a/tests.sh b/tests.sh index 0dc50dc..23fe30b 100755 --- a/tests.sh +++ b/tests.sh @@ -22,7 +22,7 @@ poetry run \ enroll explain "${BUNDLE_DIR}" --format json | jq # Install something, harvest again and diff the harvests -sudo apt-get -y install cowsay +DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends cowsay poetry run \ enroll harvest --out "${BUNDLE_DIR}2" poetry run \ From 24cedc8c8df6d8f00a4b48f3b6393077cb5b45ef Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 5 Jan 2026 12:01:25 +1100 Subject: [PATCH 04/33] Centralise the cron and logrotate stuff into their respective roles. We had a bit of duplication between roles based on harvest discovery. Arguably some crons/logrotate scripts are specific to other packages, but it helps to go to one place to find them all. We'll apply these roles last in the playbook, to give an opportunity for all other packages / non-system users to have been installed already. --- CHANGELOG.md | 1 + enroll/harvest.py | 174 +++++++++++++++++++++++++++++++++++++++++---- enroll/manifest.py | 13 +++- 3 files changed, 175 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0f6e465..c687249 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # 0.3.0 * Introduce `enroll explain` - a tool to analyze and explain what's in (or not in) a harvest and why. + * Centralise the cron and logrotate stuff into their respective roles, we had a bit of duplication between roles based on harvest discovery. # 0.2.3 diff --git a/enroll/harvest.py b/enroll/harvest.py index 7aba7c6..6ecf676 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -490,23 +490,12 @@ _SYSTEM_CAPTURE_GLOBS: List[tuple[str, str]] = [ # mounts ("/etc/fstab", "system_mounts"), ("/etc/crypttab", "system_mounts"), - # logrotate - ("/etc/logrotate.conf", "system_logrotate"), - ("/etc/logrotate.d/*", "system_logrotate"), # sysctl / modules ("/etc/sysctl.conf", "system_sysctl"), ("/etc/sysctl.d/*", "system_sysctl"), ("/etc/modprobe.d/*", "system_modprobe"), ("/etc/modules", "system_modprobe"), ("/etc/modules-load.d/*", "system_modprobe"), - # cron - ("/etc/crontab", "system_cron"), - ("/etc/cron.d/*", "system_cron"), - ("/etc/anacrontab", "system_cron"), - ("/etc/anacron/*", "system_cron"), - ("/var/spool/cron/crontabs/*", "system_cron"), - ("/var/spool/crontabs/*", "system_cron"), - ("/var/spool/cron/*", "system_cron"), # network ("/etc/netplan/*", "system_network"), ("/etc/systemd/network/*", "system_network"), @@ -762,6 +751,135 @@ def harvest( # This avoids multiple Ansible roles managing the same destination file. captured_global: Set[str] = set() + # ------------------------- + # Cron / logrotate unification + # + # If cron/logrotate are installed, capture all related configuration/state into + # dedicated package roles ("cron" and "logrotate") so the same destination path + # is never managed by unrelated roles. + # + # This includes user-specific crontabs under /var/spool, which means the cron role + # should be applied after users have been created (handled in manifest ordering). + # ------------------------- + + installed_pkgs = backend.installed_packages() or {} + installed_names: Set[str] = set(installed_pkgs.keys()) + + def _pick_installed(cands: List[str]) -> Optional[str]: + for c in cands: + if c in installed_names: + return c + return None + + cron_pkg = _pick_installed( + ["cron", "cronie", "cronie-anacron", "vixie-cron", "fcron"] + ) + logrotate_pkg = _pick_installed(["logrotate"]) + + cron_role_name = "cron" + logrotate_role_name = "logrotate" + + def _is_cron_path(p: str) -> bool: + return ( + p == "/etc/crontab" + or p == "/etc/anacrontab" + or p in ("/etc/cron.allow", "/etc/cron.deny") + or p.startswith("/etc/cron.") + or p.startswith("/etc/cron.d/") + or p.startswith("/etc/anacron/") + or p.startswith("/var/spool/cron/") + or p.startswith("/var/spool/crontabs/") + or p.startswith("/var/spool/anacron/") + ) + + def _is_logrotate_path(p: str) -> bool: + return p == "/etc/logrotate.conf" or p.startswith("/etc/logrotate.d/") + + cron_snapshot: Optional[PackageSnapshot] = None + logrotate_snapshot: Optional[PackageSnapshot] = None + + if cron_pkg: + cron_managed: List[ManagedFile] = [] + cron_excluded: List[ExcludedFile] = [] + cron_notes: List[str] = [] + cron_seen: Set[str] = set() + + cron_globs = [ + "/etc/crontab", + "/etc/cron.d/*", + "/etc/cron.hourly/*", + "/etc/cron.daily/*", + "/etc/cron.weekly/*", + "/etc/cron.monthly/*", + "/etc/cron.allow", + "/etc/cron.deny", + "/etc/anacrontab", + "/etc/anacron/*", + # user crontabs / spool state + "/var/spool/cron/*", + "/var/spool/cron/crontabs/*", + "/var/spool/crontabs/*", + "/var/spool/anacron/*", + ] + for spec in cron_globs: + for path in _iter_matching_files(spec): + if not os.path.isfile(path) or os.path.islink(path): + continue + _capture_file( + bundle_dir=bundle_dir, + role_name=cron_role_name, + abs_path=path, + reason="system_cron", + policy=policy, + path_filter=path_filter, + managed_out=cron_managed, + excluded_out=cron_excluded, + seen_role=cron_seen, + seen_global=captured_global, + ) + + cron_snapshot = PackageSnapshot( + package=cron_pkg, + role_name=cron_role_name, + managed_files=cron_managed, + excluded=cron_excluded, + notes=cron_notes, + ) + + if logrotate_pkg: + lr_managed: List[ManagedFile] = [] + lr_excluded: List[ExcludedFile] = [] + lr_notes: List[str] = [] + lr_seen: Set[str] = set() + + lr_globs = [ + "/etc/logrotate.conf", + "/etc/logrotate.d/*", + ] + for spec in lr_globs: + for path in _iter_matching_files(spec): + if not os.path.isfile(path) or os.path.islink(path): + continue + _capture_file( + bundle_dir=bundle_dir, + role_name=logrotate_role_name, + abs_path=path, + reason="system_logrotate", + policy=policy, + path_filter=path_filter, + managed_out=lr_managed, + excluded_out=lr_excluded, + seen_role=lr_seen, + seen_global=captured_global, + ) + + logrotate_snapshot = PackageSnapshot( + package=logrotate_pkg, + role_name=logrotate_role_name, + managed_files=lr_managed, + excluded=lr_excluded, + notes=lr_notes, + ) # ------------------------- # Service roles # ------------------------- @@ -777,6 +895,17 @@ def harvest( excluded_by_role: Dict[str, List[ExcludedFile]] = {} enabled_services = list_enabled_services() + + # Avoid role-name collisions with dedicated cron/logrotate package roles. + if cron_snapshot is not None or logrotate_snapshot is not None: + blocked_roles = set() + if cron_snapshot is not None: + blocked_roles.add(cron_role_name) + if logrotate_snapshot is not None: + blocked_roles.add(logrotate_role_name) + enabled_services = [ + u for u in enabled_services if _role_name_from_unit(u) not in blocked_roles + ] enabled_set = set(enabled_services) def _service_sort_key(unit: str) -> tuple[int, str, str]: @@ -886,6 +1015,10 @@ def harvest( for path, reason in backend.modified_paths(pkg, etc_paths).items(): if not os.path.isfile(path) or os.path.islink(path): continue + if cron_snapshot is not None and _is_cron_path(path): + continue + if logrotate_snapshot is not None and _is_logrotate_path(path): + continue if backend.is_pkg_config_path(path): continue candidates.setdefault(path, reason) @@ -1074,7 +1207,20 @@ def harvest( manual_pkgs_skipped: List[str] = [] pkg_snaps: List[PackageSnapshot] = [] + # Add dedicated cron/logrotate roles (if detected) as package roles. + # These roles centralise all cron/logrotate managed files so they aren't scattered + # across unrelated roles. + if cron_snapshot is not None: + pkg_snaps.append(cron_snapshot) + if logrotate_snapshot is not None: + pkg_snaps.append(logrotate_snapshot) for pkg in sorted(manual_pkgs): + if cron_snapshot is not None and pkg == cron_pkg: + manual_pkgs_skipped.append(pkg) + continue + if logrotate_snapshot is not None and pkg == logrotate_pkg: + manual_pkgs_skipped.append(pkg) + continue if pkg in covered_by_services: manual_pkgs_skipped.append(pkg) continue @@ -1091,6 +1237,10 @@ def harvest( for path, reason in backend.modified_paths(pkg, etc_paths).items(): if not os.path.isfile(path) or os.path.islink(path): continue + if cron_snapshot is not None and _is_cron_path(path): + continue + if logrotate_snapshot is not None and _is_logrotate_path(path): + continue if backend.is_pkg_config_path(path): continue candidates.setdefault(path, reason) @@ -1693,7 +1843,7 @@ def harvest( # ------------------------- # Inventory: packages (SBOM-ish) # ------------------------- - installed = backend.installed_packages() or {} + installed = installed_pkgs manual_set: Set[str] = set(manual_pkgs or []) diff --git a/enroll/manifest.py b/enroll/manifest.py index f30e5f3..ea38e98 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -1930,15 +1930,26 @@ Generated for package `{pkg}`. f.write(readme) manifested_pkg_roles.append(role) + # Place cron/logrotate at the end of the playbook so: + # - users exist before we restore per-user crontabs in /var/spool + # - most packages/services are installed/configured first + tail_roles: List[str] = [] + for r in ("cron", "logrotate"): + if r in manifested_pkg_roles: + tail_roles.append(r) + + main_pkg_roles = [r for r in manifested_pkg_roles if r not in set(tail_roles)] + all_roles = ( manifested_apt_config_roles + manifested_dnf_config_roles - + manifested_pkg_roles + + main_pkg_roles + manifested_service_roles + manifested_etc_custom_roles + manifested_usr_local_custom_roles + manifested_extra_paths_roles + manifested_users_roles + + tail_roles ) if site_mode: From e68ec0bffc9473764c57a0e0f1556f9e8438c16b Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 5 Jan 2026 14:27:56 +1100 Subject: [PATCH 05/33] More test coverage --- enroll/cli.py | 50 ----- tests.sh | 7 +- tests/test_cli.py | 290 +++++++++++++++++++++++- tests/test_explain.py | 222 ++++++++++++++++++ tests/test_harvest_cron_logrotate.py | 164 ++++++++++++++ tests/test_harvest_helpers.py | 170 ++++++++++++++ tests/test_manifest.py | 301 ++++++++++++++++++++++++- tests/test_misc_coverage.py | 320 ++++++++++++++++++++++++++ tests/test_more_coverage.py | 323 --------------------------- tests/test_pathfilter.py | 106 +++++++++ tests/test_rpm.py | 47 ++++ tests/test_rpm_run.py | 31 +++ 12 files changed, 1650 insertions(+), 381 deletions(-) create mode 100644 tests/test_explain.py create mode 100644 tests/test_harvest_cron_logrotate.py create mode 100644 tests/test_harvest_helpers.py delete mode 100644 tests/test_more_coverage.py create mode 100644 tests/test_rpm_run.py diff --git a/enroll/cli.py b/enroll/cli.py index 5624587..829a4ac 100644 --- a/enroll/cli.py +++ b/enroll/cli.py @@ -914,56 +914,6 @@ def main() -> None: fqdn=args.fqdn, jinjaturtle=_jt_mode(args), ) - elif args.cmd == "diff": - report, has_changes = compare_harvests( - args.old, args.new, sops_mode=bool(getattr(args, "sops", False)) - ) - - rendered = format_report(report, fmt=str(args.format)) - if args.out: - Path(args.out).expanduser().write_text(rendered, encoding="utf-8") - else: - print(rendered, end="") - - do_notify = bool(has_changes or getattr(args, "notify_always", False)) - - if do_notify and getattr(args, "webhook", None): - wf = str(getattr(args, "webhook_format", "json")) - body = format_report(report, fmt=wf).encode("utf-8") - headers = {"User-Agent": "enroll"} - if wf == "json": - headers["Content-Type"] = "application/json" - else: - headers["Content-Type"] = "text/plain; charset=utf-8" - for hv in getattr(args, "webhook_header", []) or []: - if ":" not in hv: - raise SystemExit( - "error: --webhook-header must be in the form 'K:V'" - ) - k, v = hv.split(":", 1) - headers[k.strip()] = v.strip() - status, _ = post_webhook(str(args.webhook), body, headers=headers) - if status and status >= 400: - raise SystemExit(f"error: webhook returned HTTP {status}") - - if do_notify and (getattr(args, "email_to", []) or []): - subject = getattr(args, "email_subject", None) or "enroll diff report" - smtp_password = None - pw_env = getattr(args, "smtp_password_env", None) - if pw_env: - smtp_password = os.environ.get(str(pw_env)) - send_email( - to_addrs=list(getattr(args, "email_to", []) or []), - subject=str(subject), - body=rendered, - from_addr=getattr(args, "email_from", None), - smtp=getattr(args, "smtp", None), - smtp_user=getattr(args, "smtp_user", None), - smtp_password=smtp_password, - ) - - if getattr(args, "exit_code", False) and has_changes: - raise SystemExit(2) except RemoteSudoPasswordRequired: raise SystemExit( "error: remote sudo requires a password. Re-run with --ask-become-pass." diff --git a/tests.sh b/tests.sh index 23fe30b..23c5ce1 100755 --- a/tests.sh +++ b/tests.sh @@ -27,9 +27,10 @@ poetry run \ enroll harvest --out "${BUNDLE_DIR}2" poetry run \ enroll diff \ - --old "${BUNDLE_DIR}" \ - --new "${BUNDLE_DIR}2" \ - --format json | jq + --old "${BUNDLE_DIR}" \ + --new "${BUNDLE_DIR}2" \ + --format json | jq +DEBIAN_FRONTEND=noninteractive apt-get remove --purge cowsay # Ansible test builtin cd "${ANSIBLE_DIR}" diff --git a/tests/test_cli.py b/tests/test_cli.py index 5fc9a66..e5c6966 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,9 +1,14 @@ +from __future__ import annotations import sys import pytest - import enroll.cli as cli +from pathlib import Path + +from enroll.remote import RemoteSudoPasswordRequired +from enroll.sopsutil import SopsError + def test_cli_harvest_subcommand_calls_harvest(monkeypatch, capsys, tmp_path): called = {} @@ -398,3 +403,286 @@ def test_cli_manifest_common_args(monkeypatch, tmp_path): cli.main() assert called["fqdn"] == "example.test" assert called["jinjaturtle"] == "off" + + +def test_cli_explain_passes_args_and_writes_stdout(monkeypatch, capsys, tmp_path): + called = {} + + def fake_explain_state( + harvest: str, + *, + sops_mode: bool = False, + fmt: str = "text", + max_examples: int = 3, + ): + called["harvest"] = harvest + called["sops_mode"] = sops_mode + called["fmt"] = fmt + called["max_examples"] = max_examples + return "EXPLAINED\n" + + monkeypatch.setattr(cli, "explain_state", fake_explain_state) + + monkeypatch.setattr( + sys, + "argv", + [ + "enroll", + "explain", + "--sops", + "--format", + "json", + "--max-examples", + "7", + str(tmp_path / "bundle" / "state.json"), + ], + ) + + cli.main() + out = capsys.readouterr().out + assert out == "EXPLAINED\n" + assert called["sops_mode"] is True + assert called["fmt"] == "json" + assert called["max_examples"] == 7 + + +def test_discover_config_path_missing_config_value_returns_none(monkeypatch): + # Covers the "--config" flag present with no value. + monkeypatch.delenv("ENROLL_CONFIG", raising=False) + monkeypatch.delenv("XDG_CONFIG_HOME", raising=False) + assert cli._discover_config_path(["--config"]) is None + + +def test_discover_config_path_defaults_to_home_config(monkeypatch, tmp_path: Path): + # Covers the Path.home() / ".config" fallback. + monkeypatch.delenv("ENROLL_CONFIG", raising=False) + monkeypatch.delenv("XDG_CONFIG_HOME", raising=False) + monkeypatch.setattr(cli.Path, "home", lambda: tmp_path) + monkeypatch.setattr(cli.Path, "cwd", lambda: tmp_path) + + cp = tmp_path / ".config" / "enroll" / "enroll.ini" + cp.parent.mkdir(parents=True) + cp.write_text("[enroll]\n", encoding="utf-8") + + assert cli._discover_config_path(["harvest"]) == cp + + +def test_cli_harvest_local_sops_encrypts_and_prints_path( + monkeypatch, tmp_path: Path, capsys +): + out_dir = tmp_path / "out" + out_dir.mkdir() + calls: dict[str, object] = {} + + def fake_harvest(bundle_dir: str, **kwargs): + calls["bundle"] = bundle_dir + # Create a minimal state.json so tooling that expects it won't break. + Path(bundle_dir).mkdir(parents=True, exist_ok=True) + (Path(bundle_dir) / "state.json").write_text("{}", encoding="utf-8") + return str(Path(bundle_dir) / "state.json") + + def fake_encrypt(bundle_dir: Path, out_file: Path, fps: list[str]): + calls["encrypt"] = (bundle_dir, out_file, fps) + out_file.write_text("encrypted", encoding="utf-8") + return out_file + + monkeypatch.setattr(cli, "harvest", fake_harvest) + monkeypatch.setattr(cli, "_encrypt_harvest_dir_to_sops", fake_encrypt) + + monkeypatch.setattr( + sys, + "argv", + [ + "enroll", + "harvest", + "--sops", + "ABCDEF", + "--out", + str(out_dir), + ], + ) + cli.main() + + printed = capsys.readouterr().out.strip() + assert printed.endswith("harvest.tar.gz.sops") + assert Path(printed).exists() + assert calls.get("encrypt") + + +def test_cli_harvest_remote_sops_encrypts_and_prints_path( + monkeypatch, tmp_path: Path, capsys +): + out_dir = tmp_path / "out" + out_dir.mkdir() + calls: dict[str, object] = {} + + def fake_remote_harvest(**kwargs): + calls["remote"] = kwargs + # Create a minimal state.json in the temp bundle. + out = Path(kwargs["local_out_dir"]) / "state.json" + out.write_text("{}", encoding="utf-8") + return out + + def fake_encrypt(bundle_dir: Path, out_file: Path, fps: list[str]): + calls["encrypt"] = (bundle_dir, out_file, fps) + out_file.write_text("encrypted", encoding="utf-8") + return out_file + + monkeypatch.setattr(cli, "remote_harvest", fake_remote_harvest) + monkeypatch.setattr(cli, "_encrypt_harvest_dir_to_sops", fake_encrypt) + + monkeypatch.setattr( + sys, + "argv", + [ + "enroll", + "harvest", + "--remote-host", + "example.com", + "--remote-user", + "root", + "--sops", + "ABCDEF", + "--out", + str(out_dir), + ], + ) + cli.main() + + printed = capsys.readouterr().out.strip() + assert printed.endswith("harvest.tar.gz.sops") + assert Path(printed).exists() + assert calls.get("remote") + assert calls.get("encrypt") + + +def test_cli_harvest_remote_password_required_exits_cleanly(monkeypatch): + def boom(**kwargs): + raise RemoteSudoPasswordRequired("pw required") + + monkeypatch.setattr(cli, "remote_harvest", boom) + monkeypatch.setattr( + sys, + "argv", + [ + "enroll", + "harvest", + "--remote-host", + "example.com", + "--remote-user", + "root", + ], + ) + with pytest.raises(SystemExit) as e: + cli.main() + assert "--ask-become-pass" in str(e.value) + + +def test_cli_runtime_error_is_wrapped_as_user_friendly_system_exit(monkeypatch): + def boom(*args, **kwargs): + raise RuntimeError("nope") + + monkeypatch.setattr(cli, "harvest", boom) + monkeypatch.setattr(sys, "argv", ["enroll", "harvest", "--out", "/tmp/x"]) + with pytest.raises(SystemExit) as e: + cli.main() + assert str(e.value) == "error: nope" + + +def test_cli_sops_error_is_wrapped_as_user_friendly_system_exit(monkeypatch): + def boom(*args, **kwargs): + raise SopsError("sops broke") + + monkeypatch.setattr(cli, "manifest", boom) + monkeypatch.setattr( + sys, "argv", ["enroll", "manifest", "--harvest", "/tmp/x", "--out", "/tmp/y"] + ) + with pytest.raises(SystemExit) as e: + cli.main() + assert str(e.value) == "error: sops broke" + + +def test_cli_diff_notifies_webhook_and_email_and_respects_exit_code( + monkeypatch, capsys +): + calls: dict[str, object] = {} + + def fake_compare(old, new, sops_mode=False): + calls["compare"] = (old, new, sops_mode) + return {"dummy": True}, True + + def fake_format(report, fmt="text"): + calls.setdefault("format", []).append((report, fmt)) + return "REPORT\n" + + def fake_post(url, body, headers=None): + calls["webhook"] = (url, body, headers) + return 200, b"ok" + + def fake_email(**kwargs): + calls["email"] = kwargs + + monkeypatch.setattr(cli, "compare_harvests", fake_compare) + monkeypatch.setattr(cli, "format_report", fake_format) + monkeypatch.setattr(cli, "post_webhook", fake_post) + monkeypatch.setattr(cli, "send_email", fake_email) + monkeypatch.setenv("SMTPPW", "secret") + + monkeypatch.setattr( + sys, + "argv", + [ + "enroll", + "diff", + "--old", + "/tmp/old", + "--new", + "/tmp/new", + "--webhook", + "https://example.invalid/h", + "--webhook-header", + "X-Test: ok", + "--email-to", + "a@example.com", + "--smtp-password-env", + "SMTPPW", + "--exit-code", + ], + ) + + with pytest.raises(SystemExit) as e: + cli.main() + assert e.value.code == 2 + + assert calls.get("compare") + assert calls.get("webhook") + assert calls.get("email") + # No report printed when exiting via --exit-code? (we still render and print). + _ = capsys.readouterr() + + +def test_cli_diff_webhook_http_error_raises_system_exit(monkeypatch): + def fake_compare(old, new, sops_mode=False): + return {"dummy": True}, True + + monkeypatch.setattr(cli, "compare_harvests", fake_compare) + monkeypatch.setattr(cli, "format_report", lambda report, fmt="text": "R\n") + monkeypatch.setattr(cli, "post_webhook", lambda url, body, headers=None: (500, b"")) + + monkeypatch.setattr( + sys, + "argv", + [ + "enroll", + "diff", + "--old", + "/tmp/old", + "--new", + "/tmp/new", + "--webhook", + "https://example.invalid/h", + ], + ) + with pytest.raises(SystemExit) as e: + cli.main() + assert "HTTP 500" in str(e.value) diff --git a/tests/test_explain.py b/tests/test_explain.py new file mode 100644 index 0000000..69f4a88 --- /dev/null +++ b/tests/test_explain.py @@ -0,0 +1,222 @@ +from __future__ import annotations + +import json +from pathlib import Path + +import enroll.explain as ex + + +def _write_state(bundle: Path, state: dict) -> Path: + bundle.mkdir(parents=True, exist_ok=True) + (bundle / "state.json").write_text(json.dumps(state, indent=2), encoding="utf-8") + return bundle / "state.json" + + +def test_explain_state_text_renders_roles_inventory_and_reasons(tmp_path: Path): + bundle = tmp_path / "bundle" + state = { + "schema_version": 3, + "host": {"hostname": "h1", "os": "debian", "pkg_backend": "dpkg"}, + "enroll": {"version": "0.0.0"}, + "inventory": { + "packages": { + "foo": { + "installations": [{"version": "1.0", "arch": "amd64"}], + "observed_via": [ + {"kind": "systemd_unit", "ref": "foo.service"}, + {"kind": "package_role", "ref": "foo"}, + ], + "roles": ["foo"], + }, + "bar": { + "installations": [{"version": "2.0", "arch": "amd64"}], + "observed_via": [{"kind": "user_installed", "ref": "manual"}], + "roles": ["bar"], + }, + } + }, + "roles": { + "users": { + "role_name": "users", + "users": [{"name": "alice"}], + "managed_files": [ + { + "path": "/home/alice/.ssh/authorized_keys", + "src_rel": "home/alice/.ssh/authorized_keys", + "owner": "alice", + "group": "alice", + "mode": "0600", + "reason": "authorized_keys", + } + ], + "managed_dirs": [ + { + "path": "/home/alice/.ssh", + "owner": "alice", + "group": "alice", + "mode": "0700", + "reason": "parent_of_managed_file", + } + ], + "excluded": [{"path": "/etc/shadow", "reason": "sensitive_content"}], + "notes": ["n1", "n2"], + }, + "services": [ + { + "unit": "foo.service", + "role_name": "foo", + "packages": ["foo"], + "managed_files": [ + { + "path": "/etc/foo.conf", + "src_rel": "etc/foo.conf", + "owner": "root", + "group": "root", + "mode": "0644", + "reason": "modified_conffile", + }, + # Unknown reason should fall back to generic text. + { + "path": "/etc/odd.conf", + "src_rel": "etc/odd.conf", + "owner": "root", + "group": "root", + "mode": "0644", + "reason": "mystery_reason", + }, + ], + "excluded": [], + "notes": [], + } + ], + "packages": [ + { + "package": "bar", + "role_name": "bar", + "managed_files": [], + "excluded": [], + "notes": [], + } + ], + "extra_paths": { + "role_name": "extra_paths", + "include_patterns": ["/etc/a", "/etc/b"], + "exclude_patterns": ["/etc/x", "/etc/y"], + "managed_files": [], + "excluded": [], + "notes": [], + }, + "apt_config": { + "role_name": "apt_config", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "dnf_config": { + "role_name": "dnf_config", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "etc_custom": { + "role_name": "etc_custom", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "usr_local_custom": { + "role_name": "usr_local_custom", + "managed_files": [], + "excluded": [], + "notes": [], + }, + }, + } + + state_path = _write_state(bundle, state) + + out = ex.explain_state(str(state_path), fmt="text", max_examples=1) + + assert "Enroll explained:" in out + assert "Host: h1" in out + assert "Inventory" in out + # observed_via summary should include both kinds (order not strictly guaranteed) + assert "observed_via" in out + assert "systemd_unit" in out + assert "user_installed" in out + + # extra_paths include/exclude patterns should be rendered with max_examples truncation. + assert "include_patterns:" in out + assert "/etc/a" in out + assert "exclude_patterns:" in out + + # Reasons section should mention known and unknown reasons. + assert "modified_conffile" in out + assert "mystery_reason" in out + assert "Captured with reason 'mystery_reason'" in out + + # Excluded paths section. + assert "Why paths were excluded" in out + assert "sensitive_content" in out + + +def test_explain_state_json_contains_structured_report(tmp_path: Path): + bundle = tmp_path / "bundle" + state = { + "schema_version": 3, + "host": {"hostname": "h2", "os": "rhel", "pkg_backend": "rpm"}, + "enroll": {"version": "1.2.3"}, + "inventory": {"packages": {}}, + "roles": { + "users": { + "role_name": "users", + "users": [], + "managed_files": [], + "excluded": [], + "notes": [], + }, + "services": [], + "packages": [], + "apt_config": { + "role_name": "apt_config", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "dnf_config": { + "role_name": "dnf_config", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "etc_custom": { + "role_name": "etc_custom", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "usr_local_custom": { + "role_name": "usr_local_custom", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "extra_paths": { + "role_name": "extra_paths", + "include_patterns": [], + "exclude_patterns": [], + "managed_files": [], + "excluded": [], + "notes": [], + }, + }, + } + state_path = _write_state(bundle, state) + + raw = ex.explain_state(str(state_path), fmt="json", max_examples=2) + rep = json.loads(raw) + assert rep["host"]["hostname"] == "h2" + assert rep["enroll"]["version"] == "1.2.3" + assert rep["inventory"]["package_count"] == 0 + assert isinstance(rep["roles"], list) + assert "reasons" in rep diff --git a/tests/test_harvest_cron_logrotate.py b/tests/test_harvest_cron_logrotate.py new file mode 100644 index 0000000..d20d371 --- /dev/null +++ b/tests/test_harvest_cron_logrotate.py @@ -0,0 +1,164 @@ +from __future__ import annotations + +import json +from pathlib import Path + +import enroll.harvest as h +from enroll.platform import PlatformInfo +from enroll.systemd import UnitInfo + + +class AllowAllPolicy: + def deny_reason(self, path: str): + return None + + +class FakeBackend: + def __init__( + self, + *, + name: str, + installed: dict[str, list[dict[str, str]]], + manual: list[str], + ): + self.name = name + self._installed = dict(installed) + self._manual = list(manual) + + def build_etc_index(self): + # No package ownership information needed for this test. + return set(), {}, {}, {} + + def installed_packages(self): + return dict(self._installed) + + def list_manual_packages(self): + return list(self._manual) + + def owner_of_path(self, path: str): + return None + + def specific_paths_for_hints(self, hints: set[str]): + return [] + + def is_pkg_config_path(self, path: str) -> bool: + return False + + def modified_paths(self, pkg: str, etc_paths: list[str]): + return {} + + +def test_harvest_unifies_cron_and_logrotate_into_dedicated_package_roles( + monkeypatch, tmp_path: Path +): + bundle = tmp_path / "bundle" + + # Fake files we want harvested. + files = { + "/etc/crontab": b"* * * * * root echo hi\n", + "/etc/cron.d/php": b"# php cron\n", + "/var/spool/cron/crontabs/alice": b"@daily echo user\n", + "/etc/logrotate.conf": b"weekly\n", + "/etc/logrotate.d/rsyslog": b"/var/log/syslog { rotate 7 }\n", + } + + monkeypatch.setattr(h.os.path, "islink", lambda p: False) + monkeypatch.setattr(h.os.path, "isfile", lambda p: p in files) + monkeypatch.setattr(h.os.path, "isdir", lambda p: False) + monkeypatch.setattr(h.os.path, "exists", lambda p: (p in files) or False) + + # Expand cron/logrotate globs deterministically. + def fake_iter_matching(spec: str, cap: int = 10000): + mapping = { + "/etc/crontab": ["/etc/crontab"], + "/etc/cron.d/*": ["/etc/cron.d/php"], + "/etc/cron.hourly/*": [], + "/etc/cron.daily/*": [], + "/etc/cron.weekly/*": [], + "/etc/cron.monthly/*": [], + "/etc/cron.allow": [], + "/etc/cron.deny": [], + "/etc/anacrontab": [], + "/etc/anacron/*": [], + "/var/spool/cron/*": [], + "/var/spool/cron/crontabs/*": ["/var/spool/cron/crontabs/alice"], + "/var/spool/crontabs/*": [], + "/var/spool/anacron/*": [], + "/etc/logrotate.conf": ["/etc/logrotate.conf"], + "/etc/logrotate.d/*": ["/etc/logrotate.d/rsyslog"], + } + return list(mapping.get(spec, []))[:cap] + + monkeypatch.setattr(h, "_iter_matching_files", fake_iter_matching) + + # Avoid real system probing. + monkeypatch.setattr( + h, "detect_platform", lambda: PlatformInfo("debian", "dpkg", {}) + ) + backend = FakeBackend( + name="dpkg", + installed={ + "cron": [{"version": "1", "arch": "amd64"}], + "logrotate": [{"version": "1", "arch": "amd64"}], + }, + # Include cron/logrotate in manual packages to ensure they are skipped in the generic loop. + manual=["cron", "logrotate"], + ) + monkeypatch.setattr(h, "get_backend", lambda info=None: backend) + + # Include a service that would collide with cron role naming. + monkeypatch.setattr( + h, "list_enabled_services", lambda: ["cron.service", "foo.service"] + ) + monkeypatch.setattr(h, "list_enabled_timers", lambda: []) + monkeypatch.setattr( + h, + "get_unit_info", + lambda unit: UnitInfo( + name=unit, + fragment_path=None, + dropin_paths=[], + env_files=[], + exec_paths=[], + active_state="active", + sub_state="running", + unit_file_state="enabled", + condition_result=None, + ), + ) + monkeypatch.setattr(h, "collect_non_system_users", lambda: []) + monkeypatch.setattr( + h, + "stat_triplet", + lambda p: ("alice" if "alice" in p else "root", "root", "0644"), + ) + + # Avoid needing real source files by implementing our own bundle copier. + def fake_copy(bundle_dir: str, role_name: str, abs_path: str, src_rel: str): + dst = Path(bundle_dir) / "artifacts" / role_name / src_rel + dst.parent.mkdir(parents=True, exist_ok=True) + dst.write_bytes(files.get(abs_path, b"")) + + monkeypatch.setattr(h, "_copy_into_bundle", fake_copy) + + state_path = h.harvest(str(bundle), policy=AllowAllPolicy()) + st = json.loads(Path(state_path).read_text(encoding="utf-8")) + + # cron.service must be skipped to avoid colliding with the dedicated "cron" package role. + svc_units = [s["unit"] for s in st["roles"]["services"]] + assert "cron.service" not in svc_units + assert "foo.service" in svc_units + + pkgs = st["roles"]["packages"] + cron = next(p for p in pkgs if p["role_name"] == "cron") + logrotate = next(p for p in pkgs if p["role_name"] == "logrotate") + + cron_paths = {mf["path"] for mf in cron["managed_files"]} + assert "/etc/crontab" in cron_paths + assert "/etc/cron.d/php" in cron_paths + # user crontab captured + assert "/var/spool/cron/crontabs/alice" in cron_paths + + lr_paths = {mf["path"] for mf in logrotate["managed_files"]} + assert "/etc/logrotate.conf" in lr_paths + assert "/etc/logrotate.d/rsyslog" in lr_paths diff --git a/tests/test_harvest_helpers.py b/tests/test_harvest_helpers.py new file mode 100644 index 0000000..531a62c --- /dev/null +++ b/tests/test_harvest_helpers.py @@ -0,0 +1,170 @@ +from __future__ import annotations + +import os +from pathlib import Path + +import enroll.harvest as h + + +def test_iter_matching_files_skips_symlinks_and_walks_dirs(monkeypatch, tmp_path: Path): + # Layout: + # root/real.txt (file) + # root/sub/nested.txt + # root/link -> ... (ignored) + root = tmp_path / "root" + (root / "sub").mkdir(parents=True) + (root / "real.txt").write_text("a", encoding="utf-8") + (root / "sub" / "nested.txt").write_text("b", encoding="utf-8") + + paths = { + str(root): "dir", + str(root / "real.txt"): "file", + str(root / "sub"): "dir", + str(root / "sub" / "nested.txt"): "file", + str(root / "link"): "link", + } + + monkeypatch.setattr(h.glob, "glob", lambda spec: [str(root), str(root / "link")]) + monkeypatch.setattr(h.os.path, "islink", lambda p: paths.get(p) == "link") + monkeypatch.setattr(h.os.path, "isfile", lambda p: paths.get(p) == "file") + monkeypatch.setattr(h.os.path, "isdir", lambda p: paths.get(p) == "dir") + monkeypatch.setattr( + h.os, + "walk", + lambda p: [ + (str(root), ["sub"], ["real.txt", "link"]), + (str(root / "sub"), [], ["nested.txt"]), + ], + ) + + out = h._iter_matching_files("/whatever/*", cap=100) + assert str(root / "real.txt") in out + assert str(root / "sub" / "nested.txt") in out + assert str(root / "link") not in out + + +def test_parse_apt_signed_by_extracts_keyrings(tmp_path: Path): + f1 = tmp_path / "a.list" + f1.write_text( + "deb [signed-by=/usr/share/keyrings/foo.gpg] https://example.invalid stable main\n", + encoding="utf-8", + ) + f2 = tmp_path / "b.sources" + f2.write_text( + "Types: deb\nSigned-By: /etc/apt/keyrings/bar.gpg, /usr/share/keyrings/baz.gpg\n", + encoding="utf-8", + ) + f3 = tmp_path / "c.sources" + f3.write_text("Signed-By: | /bin/echo nope\n", encoding="utf-8") + + out = h._parse_apt_signed_by([str(f1), str(f2), str(f3)]) + assert "/usr/share/keyrings/foo.gpg" in out + assert "/etc/apt/keyrings/bar.gpg" in out + assert "/usr/share/keyrings/baz.gpg" in out + + +def test_iter_apt_capture_paths_includes_signed_by_keyring(monkeypatch): + # Simulate: + # /etc/apt/apt.conf.d/00test + # /etc/apt/sources.list.d/test.list (signed-by outside /etc/apt) + # /usr/share/keyrings/ext.gpg + files = { + "/etc/apt/apt.conf.d/00test": "file", + "/etc/apt/sources.list.d/test.list": "file", + "/usr/share/keyrings/ext.gpg": "file", + } + + monkeypatch.setattr(h.os.path, "isdir", lambda p: p in {"/etc/apt"}) + monkeypatch.setattr( + h.os, + "walk", + lambda root: [ + ("/etc/apt", ["apt.conf.d", "sources.list.d"], []), + ("/etc/apt/apt.conf.d", [], ["00test"]), + ("/etc/apt/sources.list.d", [], ["test.list"]), + ], + ) + monkeypatch.setattr(h.os.path, "islink", lambda p: False) + monkeypatch.setattr(h.os.path, "isfile", lambda p: files.get(p) == "file") + + # Only treat the sources glob as having a hit. + def fake_iter_matching(spec: str, cap: int = 10000): + if spec == "/etc/apt/sources.list.d/*.list": + return ["/etc/apt/sources.list.d/test.list"] + return [] + + monkeypatch.setattr(h, "_iter_matching_files", fake_iter_matching) + + # Provide file contents for the sources file. + real_open = open + + def fake_open(path, *a, **k): + if path == "/etc/apt/sources.list.d/test.list": + return real_open(os.devnull, "r", encoding="utf-8") # placeholder + return real_open(path, *a, **k) + + # Easier: patch _parse_apt_signed_by directly to avoid filesystem reads. + monkeypatch.setattr( + h, "_parse_apt_signed_by", lambda sfs: {"/usr/share/keyrings/ext.gpg"} + ) + + out = h._iter_apt_capture_paths() + paths = {p for p, _r in out} + reasons = {p: r for p, r in out} + assert "/etc/apt/apt.conf.d/00test" in paths + assert "/etc/apt/sources.list.d/test.list" in paths + assert "/usr/share/keyrings/ext.gpg" in paths + assert reasons["/usr/share/keyrings/ext.gpg"] == "apt_signed_by_keyring" + + +def test_iter_dnf_capture_paths(monkeypatch): + files = { + "/etc/dnf/dnf.conf": "file", + "/etc/yum/yum.conf": "file", + "/etc/yum.conf": "file", + "/etc/yum.repos.d/test.repo": "file", + "/etc/pki/rpm-gpg/RPM-GPG-KEY": "file", + } + + def isdir(p): + return p in {"/etc/dnf", "/etc/yum", "/etc/yum.repos.d", "/etc/pki/rpm-gpg"} + + def walk(root): + if root == "/etc/dnf": + return [("/etc/dnf", [], ["dnf.conf"])] + if root == "/etc/yum": + return [("/etc/yum", [], ["yum.conf"])] + if root == "/etc/pki/rpm-gpg": + return [("/etc/pki/rpm-gpg", [], ["RPM-GPG-KEY"])] + return [] + + monkeypatch.setattr(h.os.path, "isdir", isdir) + monkeypatch.setattr(h.os, "walk", walk) + monkeypatch.setattr(h.os.path, "islink", lambda p: False) + monkeypatch.setattr(h.os.path, "isfile", lambda p: files.get(p) == "file") + monkeypatch.setattr( + h, + "_iter_matching_files", + lambda spec, cap=10000: ( + ["/etc/yum.repos.d/test.repo"] if spec.endswith("*.repo") else [] + ), + ) + + out = h._iter_dnf_capture_paths() + paths = {p for p, _r in out} + assert "/etc/dnf/dnf.conf" in paths + assert "/etc/yum/yum.conf" in paths + assert "/etc/yum.conf" in paths + assert "/etc/yum.repos.d/test.repo" in paths + assert "/etc/pki/rpm-gpg/RPM-GPG-KEY" in paths + + +def test_iter_system_capture_paths_dedupes_first_reason(monkeypatch): + monkeypatch.setattr(h, "_SYSTEM_CAPTURE_GLOBS", [("/a", "r1"), ("/b", "r2")]) + monkeypatch.setattr( + h, + "_iter_matching_files", + lambda spec, cap=10000: ["/dup"] if spec in {"/a", "/b"} else [], + ) + out = h._iter_system_capture_paths() + assert out == [("/dup", "r1")] diff --git a/tests/test_manifest.py b/tests/test_manifest.py index fec9cc3..8b34fcb 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -1,7 +1,12 @@ import json from pathlib import Path -from enroll.manifest import manifest +import os +import stat +import tarfile +import pytest + +import enroll.manifest as manifest def test_manifest_writes_roles_and_playbook_with_clean_when(tmp_path: Path): @@ -176,7 +181,7 @@ def test_manifest_writes_roles_and_playbook_with_clean_when(tmp_path: Path): bundle / "artifacts" / "usr_local_custom" / "usr" / "local" / "bin" / "myscript" ).write_text("#!/bin/sh\necho hi\n", encoding="utf-8") - manifest(str(bundle), str(out)) + manifest.manifest(str(bundle), str(out)) # Service role: systemd management should be gated on foo_manage_unit and a probe. tasks = (out / "roles" / "foo" / "tasks" / "main.yml").read_text(encoding="utf-8") @@ -345,7 +350,7 @@ def test_manifest_site_mode_creates_host_inventory_and_raw_files(tmp_path: Path) / "myapp.conf" ).write_text("myapp=1\n", encoding="utf-8") - manifest(str(bundle), str(out), fqdn=fqdn) + manifest.manifest(str(bundle), str(out), fqdn=fqdn) # Host playbook exists. assert (out / "playbooks" / f"{fqdn}.yml").exists() @@ -482,7 +487,7 @@ def test_manifest_includes_dnf_config_role_when_present(tmp_path: Path): bundle.mkdir(parents=True, exist_ok=True) (bundle / "state.json").write_text(json.dumps(state, indent=2), encoding="utf-8") - manifest(str(bundle), str(out)) + manifest.manifest(str(bundle), str(out)) pb = (out / "playbook.yml").read_text(encoding="utf-8") assert "- dnf_config" in pb @@ -502,3 +507,291 @@ def test_render_install_packages_tasks_contains_dnf_branch(): assert "ansible.builtin.dnf" in txt assert "ansible.builtin.package" in txt assert "pkg_mgr" in txt + + +def test_manifest_orders_cron_and_logrotate_at_playbook_tail(tmp_path: Path): + """Cron/logrotate roles should appear at the end. + + The cron role may restore per-user crontabs under /var/spool, so it should + run after users have been created. + """ + + bundle = tmp_path / "bundle" + out = tmp_path / "ansible" + + state = { + "schema_version": 3, + "host": {"hostname": "test", "os": "debian", "pkg_backend": "dpkg"}, + "inventory": {"packages": {}}, + "roles": { + "users": { + "role_name": "users", + "users": [{"name": "alice"}], + "managed_files": [], + "excluded": [], + "notes": [], + }, + "services": [], + "packages": [ + { + "package": "curl", + "role_name": "curl", + "managed_files": [], + "excluded": [], + "notes": [], + }, + { + "package": "cron", + "role_name": "cron", + "managed_files": [ + { + "path": "/var/spool/cron/crontabs/alice", + "src_rel": "var/spool/cron/crontabs/alice", + "owner": "alice", + "group": "root", + "mode": "0600", + "reason": "system_cron", + } + ], + "excluded": [], + "notes": [], + }, + { + "package": "logrotate", + "role_name": "logrotate", + "managed_files": [ + { + "path": "/etc/logrotate.conf", + "src_rel": "etc/logrotate.conf", + "owner": "root", + "group": "root", + "mode": "0644", + "reason": "system_logrotate", + } + ], + "excluded": [], + "notes": [], + }, + ], + "apt_config": { + "role_name": "apt_config", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "dnf_config": { + "role_name": "dnf_config", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "etc_custom": { + "role_name": "etc_custom", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "usr_local_custom": { + "role_name": "usr_local_custom", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "extra_paths": { + "role_name": "extra_paths", + "include_patterns": [], + "exclude_patterns": [], + "managed_files": [], + "excluded": [], + "notes": [], + }, + }, + } + + # Minimal artifacts for managed files. + (bundle / "artifacts" / "cron" / "var" / "spool" / "cron" / "crontabs").mkdir( + parents=True, exist_ok=True + ) + ( + bundle / "artifacts" / "cron" / "var" / "spool" / "cron" / "crontabs" / "alice" + ).write_text("@daily echo hi\n", encoding="utf-8") + (bundle / "artifacts" / "logrotate" / "etc").mkdir(parents=True, exist_ok=True) + (bundle / "artifacts" / "logrotate" / "etc" / "logrotate.conf").write_text( + "weekly\n", encoding="utf-8" + ) + + bundle.mkdir(parents=True, exist_ok=True) + (bundle / "state.json").write_text(json.dumps(state, indent=2), encoding="utf-8") + + manifest.manifest(str(bundle), str(out)) + + pb = (out / "playbook.yml").read_text(encoding="utf-8").splitlines() + # Roles are emitted as indented list items under the `roles:` key. + roles = [ + ln.strip().removeprefix("- ").strip() for ln in pb if ln.startswith(" - ") + ] + + # Ensure tail ordering. + assert roles[-2:] == ["cron", "logrotate"] + assert "users" in roles + assert roles.index("users") < roles.index("cron") + + +def test_yaml_helpers_fallback_when_yaml_unavailable(monkeypatch): + monkeypatch.setattr(manifest, "_try_yaml", lambda: None) + assert manifest._yaml_load_mapping("foo: 1\n") == {} + out = manifest._yaml_dump_mapping({"b": 2, "a": 1}) + # Best-effort fallback is key: repr(value) + assert out.splitlines()[0].startswith("a: ") + assert out.endswith("\n") + + +def test_copy2_replace_makes_readonly_sources_user_writable( + monkeypatch, tmp_path: Path +): + src = tmp_path / "src.txt" + dst = tmp_path / "dst.txt" + src.write_text("hello", encoding="utf-8") + # Make source read-only; copy2 preserves mode, so tmp will be read-only too. + os.chmod(src, 0o444) + + manifest._copy2_replace(str(src), str(dst)) + + st = os.stat(dst, follow_symlinks=False) + assert stat.S_IMODE(st.st_mode) & stat.S_IWUSR + + +def test_prepare_bundle_dir_sops_decrypts_and_extracts(monkeypatch, tmp_path: Path): + enc = tmp_path / "harvest.tar.gz.sops" + enc.write_text("ignored", encoding="utf-8") + + def fake_require(): + return None + + def fake_decrypt(src: str, dst: str, *, mode: int = 0o600): + # Create a minimal tar.gz with a state.json file. + with tarfile.open(dst, "w:gz") as tf: + p = tmp_path / "state.json" + p.write_text("{}", encoding="utf-8") + tf.add(p, arcname="state.json") + + monkeypatch.setattr(manifest, "require_sops_cmd", fake_require) + monkeypatch.setattr(manifest, "decrypt_file_binary_to", fake_decrypt) + + bundle_dir, td = manifest._prepare_bundle_dir(str(enc), sops_mode=True) + try: + assert (Path(bundle_dir) / "state.json").exists() + finally: + td.cleanup() + + +def test_prepare_bundle_dir_rejects_non_dir_without_sops(tmp_path: Path): + fp = tmp_path / "bundle.tar.gz" + fp.write_text("x", encoding="utf-8") + with pytest.raises(RuntimeError): + manifest._prepare_bundle_dir(str(fp), sops_mode=False) + + +def test_tar_dir_to_with_progress_writes_progress_when_tty(monkeypatch, tmp_path: Path): + src = tmp_path / "dir" + src.mkdir() + (src / "a.txt").write_text("a", encoding="utf-8") + (src / "b.txt").write_text("b", encoding="utf-8") + + out = tmp_path / "out.tar.gz" + writes: list[bytes] = [] + + monkeypatch.setattr(manifest.os, "isatty", lambda fd: True) + monkeypatch.setattr(manifest.os, "write", lambda fd, b: writes.append(b) or len(b)) + + manifest._tar_dir_to_with_progress(str(src), str(out), desc="tarring") + assert out.exists() + assert writes # progress was written + assert writes[-1].endswith(b"\n") + + +def test_encrypt_manifest_out_dir_to_sops_handles_missing_tmp_cleanup( + monkeypatch, tmp_path: Path +): + src_dir = tmp_path / "manifest" + src_dir.mkdir() + (src_dir / "x.txt").write_text("x", encoding="utf-8") + + out = tmp_path / "manifest.tar.gz.sops" + + monkeypatch.setattr(manifest, "require_sops_cmd", lambda: None) + + def fake_encrypt(in_fp, out_fp, *args, **kwargs): + Path(out_fp).write_text("enc", encoding="utf-8") + + monkeypatch.setattr(manifest, "encrypt_file_binary", fake_encrypt) + # Simulate race where tmp tar is already removed. + monkeypatch.setattr( + manifest.os, "unlink", lambda p: (_ for _ in ()).throw(FileNotFoundError()) + ) + + res = manifest._encrypt_manifest_out_dir_to_sops(str(src_dir), str(out), ["ABC"]) # type: ignore[arg-type] + assert str(res).endswith(".sops") + assert out.exists() + + +def test_manifest_applies_jinjaturtle_to_jinjifyable_managed_file( + monkeypatch, tmp_path: Path +): + # Create a minimal bundle with just an apt_config snapshot. + bundle = tmp_path / "bundle" + (bundle / "artifacts" / "apt_config" / "etc" / "apt").mkdir(parents=True) + (bundle / "artifacts" / "apt_config" / "etc" / "apt" / "foo.ini").write_text( + "key=VALUE\n", encoding="utf-8" + ) + + state = { + "schema_version": 1, + "inventory": {"packages": {}}, + "roles": { + "services": [], + "packages": [], + "apt_config": { + "role_name": "apt_config", + "managed_files": [ + { + "path": "/etc/apt/foo.ini", + "src_rel": "etc/apt/foo.ini", + "owner": "root", + "group": "root", + "mode": "0644", + "reason": "apt_config", + } + ], + "managed_dirs": [], + "excluded": [], + "notes": [], + }, + }, + } + (bundle / "state.json").write_text( + __import__("json").dumps(state), encoding="utf-8" + ) + + monkeypatch.setattr(manifest, "find_jinjaturtle_cmd", lambda: "jinjaturtle") + + class _Res: + template_text = "key={{ foo }}\n" + vars_text = "foo: 123\n" + + monkeypatch.setattr(manifest, "run_jinjaturtle", lambda *a, **k: _Res()) + + out_dir = tmp_path / "out" + manifest.manifest(str(bundle), str(out_dir), jinjaturtle="on") + + tmpl = out_dir / "roles" / "apt_config" / "templates" / "etc" / "apt" / "foo.ini.j2" + assert tmpl.exists() + assert "{{ foo }}" in tmpl.read_text(encoding="utf-8") + + defaults = out_dir / "roles" / "apt_config" / "defaults" / "main.yml" + txt = defaults.read_text(encoding="utf-8") + assert "foo: 123" in txt + # Non-templated file should not exist under files/. + assert not ( + out_dir / "roles" / "apt_config" / "files" / "etc" / "apt" / "foo.ini" + ).exists() diff --git a/tests/test_misc_coverage.py b/tests/test_misc_coverage.py index b4250fc..1ff6e98 100644 --- a/tests/test_misc_coverage.py +++ b/tests/test_misc_coverage.py @@ -1,5 +1,13 @@ +from __future__ import annotations + +import json +import os import stat +import subprocess +import sys +import types from pathlib import Path +from types import SimpleNamespace import pytest @@ -94,3 +102,315 @@ def test_sops_pgp_arg_and_encrypt_decrypt_roundtrip(tmp_path: Path, monkeypatch) # Sanity: we invoked encrypt and decrypt. assert any("--encrypt" in c for c in calls) assert any("--decrypt" in c for c in calls) + + +def test_cache_dir_defaults_to_home_cache(monkeypatch, tmp_path: Path): + # Ensure default path uses ~/.cache when XDG_CACHE_HOME is unset. + from enroll.cache import enroll_cache_dir + + monkeypatch.delenv("XDG_CACHE_HOME", raising=False) + monkeypatch.setattr(Path, "home", lambda: tmp_path) + + p = enroll_cache_dir() + assert str(p).startswith(str(tmp_path)) + assert p.name == "enroll" + + +def test_harvest_cache_state_json_property(tmp_path: Path): + from enroll.cache import HarvestCache + + hc = HarvestCache(tmp_path / "h1") + assert hc.state_json == hc.dir / "state.json" + + +def test_cache_dir_security_rejects_symlink(tmp_path: Path): + from enroll.cache import _ensure_dir_secure + + real = tmp_path / "real" + real.mkdir() + link = tmp_path / "link" + link.symlink_to(real, target_is_directory=True) + + with pytest.raises(RuntimeError, match="Refusing to use symlink"): + _ensure_dir_secure(link) + + +def test_cache_dir_chmod_failures_are_ignored(monkeypatch, tmp_path: Path): + from enroll import cache + + # Make the cache base path deterministic and writable. + monkeypatch.setattr(cache, "enroll_cache_dir", lambda: tmp_path) + + # Force os.chmod to fail to cover the "except OSError: pass" paths. + monkeypatch.setattr( + os, "chmod", lambda *a, **k: (_ for _ in ()).throw(OSError("nope")) + ) + + hc = cache.new_harvest_cache_dir() + assert hc.dir.exists() + assert hc.dir.is_dir() + + +def test_stat_triplet_falls_back_to_numeric_ids(monkeypatch, tmp_path: Path): + from enroll.fsutil import stat_triplet + import pwd + import grp + + p = tmp_path / "x" + p.write_text("x", encoding="utf-8") + + # Force username/group resolution failures. + monkeypatch.setattr( + pwd, "getpwuid", lambda _uid: (_ for _ in ()).throw(KeyError("no user")) + ) + monkeypatch.setattr( + grp, "getgrgid", lambda _gid: (_ for _ in ()).throw(KeyError("no group")) + ) + + owner, group, mode = stat_triplet(str(p)) + assert owner.isdigit() + assert group.isdigit() + assert len(mode) == 4 + + +def test_ignore_policy_iter_effective_lines_removes_block_comments(): + from enroll.ignore import IgnorePolicy + + pol = IgnorePolicy() + data = b"""keep1 +/* +drop me +*/ +keep2 +""" + assert list(pol.iter_effective_lines(data)) == [b"keep1", b"keep2"] + + +def test_ignore_policy_deny_reason_dir_variants(tmp_path: Path): + from enroll.ignore import IgnorePolicy + + pol = IgnorePolicy() + + # denied by glob + assert pol.deny_reason_dir("/etc/shadow") == "denied_path" + + # symlink rejected + d = tmp_path / "d" + d.mkdir() + link = tmp_path / "l" + link.symlink_to(d, target_is_directory=True) + assert pol.deny_reason_dir(str(link)) == "symlink" + + # not a directory + f = tmp_path / "f" + f.write_text("x", encoding="utf-8") + assert pol.deny_reason_dir(str(f)) == "not_directory" + + # ok + assert pol.deny_reason_dir(str(d)) is None + + +def test_run_jinjaturtle_parses_outputs(monkeypatch, tmp_path: Path): + # Fully unit-test enroll.jinjaturtle.run_jinjaturtle by stubbing subprocess.run. + from enroll.jinjaturtle import run_jinjaturtle + + def fake_run(cmd, **kwargs): # noqa: ARG001 + # cmd includes "-d -t