From 240e79706f18d0092fa54698c2e16b7c2ddd127b Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sat, 20 Dec 2025 17:47:00 +1100 Subject: [PATCH 01/69] Allow the user to add extra paths to harvest, or paths to ignore, using `--exclude-path` and `--include-path` arguments. --- CHANGELOG.md | 5 + README.md | 26 ++++ enroll/cli.py | 78 +++++++++++- enroll/diff.py | 6 + enroll/harvest.py | 110 ++++++++++++++++ enroll/manifest.py | 115 +++++++++++++++++ enroll/pathfilter.py | 293 +++++++++++++++++++++++++++++++++++++++++++ enroll/remote.py | 21 +++- tests/test_cli.py | 45 ++++++- 9 files changed, 687 insertions(+), 12 deletions(-) create mode 100644 enroll/pathfilter.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 0e80a13..2d8d6e4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +# 0.1.3 + + * Allow the user to add extra paths to harvest, or paths to ignore, using `--exclude-path` and `--include-path` + arguments. + # 0.1.2 * Include files from `/usr/local/bin` and `/usr/local/etc` in harvest (assuming they aren't binaries or diff --git a/README.md b/README.md index 6645437..84a6965 100644 --- a/README.md +++ b/README.md @@ -69,6 +69,7 @@ Harvest state about a host and write a harvest bundle. - Changed-from-default config (plus related custom/unowned files under service dirs) - Non-system users + SSH public keys - Misc `/etc` that can’t be attributed to a package (`etc_custom` role) +- Optional user-specified extra files/dirs via `--include-path` (emitted as an `extra_paths` role at manifest time) **Common flags** - Remote harvesting: @@ -79,6 +80,14 @@ Harvest state about a host and write a harvest bundle. - `--dangerous`: disables secret-safety checks (see “Sensitive data” below) - Encrypt bundles at rest: - `--sops `: writes a single encrypted `harvest.tar.gz.sops` instead of a plaintext directory +- Path selection (include/exclude): + - `--include-path ` (repeatable): add extra files/dirs to harvest (even from locations normally ignored, like `/home`). Still subject to secret-safety checks unless `--dangerous`. + - `--exclude-path ` (repeatable): skip files/dirs even if they would normally be harvested. + - Pattern syntax: + - plain path: matches that file; directories match the directory + everything under it + - glob (default): supports `*` and `**` (prefix with `glob:` to force) + - regex: prefix with `re:` or `regex:` + - Precedence: excludes win over includes. --- @@ -227,6 +236,23 @@ enroll harvest --out /tmp/enroll-harvest enroll harvest --remote-host myhost.example.com --remote-user myuser --out /tmp/enroll-harvest ``` +### Include paths (`--include-path`) +```bash +# Add a few dotfiles from /home (still secret-safe unless --dangerous) +enroll harvest --out /tmp/enroll-harvest --include-path '/home/*/.bashrc' --include-path '/home/*/.profile' +``` + +### Exclude paths (`--exclude-path`) +```bash +# Skip specific /usr/local/bin entries (or patterns) +enroll harvest --out /tmp/enroll-harvest --exclude-path '/usr/local/bin/docker-*' --exclude-path '/usr/local/bin/some-tool' +``` + +### Regex include +```bash +enroll harvest --out /tmp/enroll-harvest --include-path 're:^/home/[^/]+/\.config/myapp/.*$' +``` + ### `--dangerous` ```bash enroll harvest --out /tmp/enroll-harvest --dangerous diff --git a/enroll/cli.py b/enroll/cli.py index 2d8ed5e..f6efe11 100644 --- a/enroll/cli.py +++ b/enroll/cli.py @@ -125,6 +125,27 @@ def main() -> None: action="store_true", help="Collect files more aggressively (may include secrets). Disables secret-avoidance checks.", ) + h.add_argument( + "--include-path", + action="append", + default=[], + metavar="PATTERN", + help=( + "Include extra file paths to harvest (repeatable). Supports globs (including '**') and regex via 're:'. " + "Included files are still filtered by IgnorePolicy unless --dangerous is used." + ), + ) + h.add_argument( + "--exclude-path", + action="append", + default=[], + metavar="PATTERN", + help=( + "Exclude file paths from harvesting (repeatable). Supports globs (including '**') and regex via 're:'. " + "Excludes apply to all harvesting, including defaults." + ), + ) + h.add_argument( "--sops", nargs="+", @@ -186,6 +207,27 @@ def main() -> None: action="store_true", help="Collect files more aggressively (may include secrets). Disables secret-avoidance checks.", ) + s.add_argument( + "--include-path", + action="append", + default=[], + metavar="PATTERN", + help=( + "Include extra file paths to harvest (repeatable). Supports globs (including '**') and regex via 're:'. " + "Included files are still filtered by IgnorePolicy unless --dangerous is used." + ), + ) + s.add_argument( + "--exclude-path", + action="append", + default=[], + metavar="PATTERN", + help=( + "Exclude file paths from harvesting (repeatable). Supports globs (including '**') and regex via 're:'. " + "Excludes apply to all harvesting, including defaults." + ), + ) + s.add_argument( "--sops", nargs="+", @@ -320,6 +362,8 @@ def main() -> None: remote_user=args.remote_user, dangerous=bool(args.dangerous), no_sudo=bool(args.no_sudo), + include_paths=list(getattr(args, "include_path", []) or []), + exclude_paths=list(getattr(args, "exclude_path", []) or []), ) _encrypt_harvest_dir_to_sops( tmp_bundle, out_file, list(sops_fps) @@ -338,6 +382,8 @@ def main() -> None: remote_user=args.remote_user, dangerous=bool(args.dangerous), no_sudo=bool(args.no_sudo), + include_paths=list(getattr(args, "include_path", []) or []), + exclude_paths=list(getattr(args, "exclude_path", []) or []), ) print(str(state)) else: @@ -350,7 +396,12 @@ def main() -> None: os.chmod(tmp_bundle, 0o700) except OSError: pass - harvest(str(tmp_bundle), dangerous=bool(args.dangerous)) + harvest( + str(tmp_bundle), + dangerous=bool(args.dangerous), + include_paths=list(getattr(args, "include_path", []) or []), + exclude_paths=list(getattr(args, "exclude_path", []) or []), + ) _encrypt_harvest_dir_to_sops( tmp_bundle, out_file, list(sops_fps) ) @@ -360,7 +411,12 @@ def main() -> None: raise SystemExit( "error: --out is required unless --remote-host is set" ) - path = harvest(args.out, dangerous=bool(args.dangerous)) + path = harvest( + args.out, + dangerous=bool(args.dangerous), + include_paths=list(getattr(args, "include_path", []) or []), + exclude_paths=list(getattr(args, "exclude_path", []) or []), + ) print(path) elif args.cmd == "manifest": out_enc = manifest( @@ -446,6 +502,8 @@ def main() -> None: remote_user=args.remote_user, dangerous=bool(args.dangerous), no_sudo=bool(args.no_sudo), + include_paths=list(getattr(args, "include_path", []) or []), + exclude_paths=list(getattr(args, "exclude_path", []) or []), ) _encrypt_harvest_dir_to_sops( tmp_bundle, out_file, list(sops_fps) @@ -473,6 +531,8 @@ def main() -> None: remote_user=args.remote_user, dangerous=bool(args.dangerous), no_sudo=bool(args.no_sudo), + include_paths=list(getattr(args, "include_path", []) or []), + exclude_paths=list(getattr(args, "exclude_path", []) or []), ) manifest( str(harvest_dir), @@ -493,7 +553,12 @@ def main() -> None: os.chmod(tmp_bundle, 0o700) except OSError: pass - harvest(str(tmp_bundle), dangerous=bool(args.dangerous)) + harvest( + str(tmp_bundle), + dangerous=bool(args.dangerous), + include_paths=list(getattr(args, "include_path", []) or []), + exclude_paths=list(getattr(args, "exclude_path", []) or []), + ) _encrypt_harvest_dir_to_sops( tmp_bundle, out_file, list(sops_fps) ) @@ -512,7 +577,12 @@ def main() -> None: raise SystemExit( "error: --harvest is required unless --remote-host is set" ) - harvest(args.harvest, dangerous=bool(args.dangerous)) + harvest( + args.harvest, + dangerous=bool(args.dangerous), + include_paths=list(getattr(args, "include_path", []) or []), + exclude_paths=list(getattr(args, "exclude_path", []) or []), + ) manifest( args.harvest, args.out, diff --git a/enroll/diff.py b/enroll/diff.py index e2861c9..a2b7d91 100644 --- a/enroll/diff.py +++ b/enroll/diff.py @@ -196,6 +196,12 @@ def _iter_managed_files(state: Dict[str, Any]) -> Iterable[Tuple[str, Dict[str, for mf in ul.get("managed_files", []) or []: yield str(ul_role), mf + # extra_paths + xp = state.get("extra_paths") or {} + xp_role = xp.get("role_name") or "extra_paths" + for mf in xp.get("managed_files", []) or []: + yield str(xp_role), mf + def _file_index(bundle_dir: Path, state: Dict[str, Any]) -> Dict[str, FileRec]: """Return mapping of absolute path -> FileRec. diff --git a/enroll/harvest.py b/enroll/harvest.py index 659bebc..48242d6 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -19,6 +19,7 @@ from .debian import ( stat_triplet, ) from .ignore import IgnorePolicy +from .pathfilter import PathFilter, expand_includes from .accounts import collect_non_system_users @@ -86,6 +87,16 @@ class UsrLocalCustomSnapshot: notes: List[str] +@dataclass +class ExtraPathsSnapshot: + role_name: str + include_patterns: List[str] + exclude_patterns: List[str] + managed_files: List[ManagedFile] + excluded: List[ExcludedFile] + notes: List[str] + + ALLOWED_UNOWNED_EXTS = { ".conf", ".cfg", @@ -250,6 +261,8 @@ def harvest( policy: Optional[IgnorePolicy] = None, *, dangerous: bool = False, + include_paths: Optional[List[str]] = None, + exclude_paths: Optional[List[str]] = None, ) -> str: # If a policy is not supplied, build one. `--dangerous` relaxes secret # detection and deny-glob skipping. @@ -261,6 +274,10 @@ def harvest( policy.dangerous = True os.makedirs(bundle_dir, exist_ok=True) + # User-provided includes/excludes. Excludes apply to all harvesting; + # includes are harvested into an extra role. + path_filter = PathFilter(include=include_paths or (), exclude=exclude_paths or ()) + if hasattr(os, "geteuid") and os.geteuid() != 0: print( "Warning: not running as root; harvest may miss files or metadata.", @@ -406,6 +423,9 @@ def harvest( ) for path, reason in sorted(candidates.items()): + if path_filter.is_excluded(path): + excluded.append(ExcludedFile(path=path, reason="user_excluded")) + continue deny = policy.deny_reason(path) if deny: excluded.append(ExcludedFile(path=path, reason=deny)) @@ -522,6 +542,9 @@ def harvest( candidates.setdefault(r, "custom_specific_path") for path, reason in sorted(candidates.items()): + if path_filter.is_excluded(path): + excluded.append(ExcludedFile(path=path, reason="user_excluded")) + continue deny = policy.deny_reason(path) if deny: excluded.append(ExcludedFile(path=path, reason=deny)) @@ -593,6 +616,9 @@ def harvest( # Copy only safe SSH public material: authorized_keys + *.pub for sf in u.ssh_files: + if path_filter.is_excluded(sf): + users_excluded.append(ExcludedFile(path=sf, reason="user_excluded")) + continue deny = policy.deny_reason(sf) if deny: users_excluded.append(ExcludedFile(path=sf, reason=deny)) @@ -665,6 +691,10 @@ def harvest( if not _is_confish(path): continue + if path_filter.is_excluded(path): + etc_excluded.append(ExcludedFile(path=path, reason="user_excluded")) + continue + deny = policy.deny_reason(path) if deny: etc_excluded.append(ExcludedFile(path=path, reason=deny)) @@ -754,6 +784,10 @@ def harvest( ul_excluded.append(ExcludedFile(path=path, reason="unreadable")) continue + if path_filter.is_excluded(path): + ul_excluded.append(ExcludedFile(path=path, reason="user_excluded")) + continue + deny = policy.deny_reason(path) if deny: ul_excluded.append(ExcludedFile(path=path, reason=deny)) @@ -806,6 +840,81 @@ def harvest( notes=ul_notes, ) + # ------------------------- + # extra_paths role (user-requested includes) + # ------------------------- + extra_notes: List[str] = [] + extra_excluded: List[ExcludedFile] = [] + extra_managed: List[ManagedFile] = [] + extra_role_name = "extra_paths" + + include_specs = list(include_paths or []) + exclude_specs = list(exclude_paths or []) + + if include_specs: + extra_notes.append("User include patterns:") + extra_notes.extend([f"- {p}" for p in include_specs]) + if exclude_specs: + extra_notes.append("User exclude patterns:") + extra_notes.extend([f"- {p}" for p in exclude_specs]) + + included_files: List[str] = [] + if include_specs: + files, inc_notes = expand_includes( + path_filter.iter_include_patterns(), + exclude=path_filter, + max_files=4000, + ) + included_files = files + extra_notes.extend(inc_notes) + + for path in included_files: + if path in already_all: + continue + + if path_filter.is_excluded(path): + extra_excluded.append(ExcludedFile(path=path, reason="user_excluded")) + continue + + deny = policy.deny_reason(path) + if deny: + extra_excluded.append(ExcludedFile(path=path, reason=deny)) + continue + + try: + owner, group, mode = stat_triplet(path) + except OSError: + extra_excluded.append(ExcludedFile(path=path, reason="unreadable")) + continue + + src_rel = path.lstrip("/") + try: + _copy_into_bundle(bundle_dir, extra_role_name, path, src_rel) + except OSError: + extra_excluded.append(ExcludedFile(path=path, reason="unreadable")) + continue + + extra_managed.append( + ManagedFile( + path=path, + src_rel=src_rel, + owner=owner, + group=group, + mode=mode, + reason="user_include", + ) + ) + already_all.add(path) + + extra_paths_snapshot = ExtraPathsSnapshot( + role_name=extra_role_name, + include_patterns=include_specs, + exclude_patterns=exclude_specs, + managed_files=extra_managed, + excluded=extra_excluded, + notes=extra_notes, + ) + state = { "host": {"hostname": os.uname().nodename, "os": "debian"}, "users": asdict(users_snapshot), @@ -815,6 +924,7 @@ def harvest( "package_roles": [asdict(p) for p in pkg_snaps], "etc_custom": asdict(etc_custom_snapshot), "usr_local_custom": asdict(usr_local_custom_snapshot), + "extra_paths": asdict(extra_paths_snapshot), } state_path = os.path.join(bundle_dir, "state.json") diff --git a/enroll/manifest.py b/enroll/manifest.py index 6909c5c..2f28eab 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -630,6 +630,7 @@ def _manifest_from_bundle_dir( users_snapshot: Dict[str, Any] = state.get("users", {}) etc_custom_snapshot: Dict[str, Any] = state.get("etc_custom", {}) usr_local_custom_snapshot: Dict[str, Any] = state.get("usr_local_custom", {}) + extra_paths_snapshot: Dict[str, Any] = state.get("extra_paths", {}) site_mode = fqdn is not None and fqdn != "" @@ -663,6 +664,7 @@ def _manifest_from_bundle_dir( manifested_users_roles: List[str] = [] manifested_etc_custom_roles: List[str] = [] manifested_usr_local_custom_roles: List[str] = [] + manifested_extra_paths_roles: List[str] = [] manifested_service_roles: List[str] = [] manifested_pkg_roles: List[str] = [] @@ -1098,6 +1100,118 @@ Unowned /etc config files not attributed to packages or services. manifested_usr_local_custom_roles.append(role) + # ------------------------- + # extra_paths role (user-requested includes) + # ------------------------- + if extra_paths_snapshot and extra_paths_snapshot.get("managed_files"): + role = extra_paths_snapshot.get("role_name", "extra_paths") + role_dir = os.path.join(roles_root, role) + _write_role_scaffold(role_dir) + + var_prefix = role + + managed_files = extra_paths_snapshot.get("managed_files", []) + excluded = extra_paths_snapshot.get("excluded", []) + notes = extra_paths_snapshot.get("notes", []) + include_pats = extra_paths_snapshot.get("include_patterns", []) or [] + exclude_pats = extra_paths_snapshot.get("exclude_patterns", []) or [] + + templated, jt_vars = _jinjify_managed_files( + bundle_dir, + role, + role_dir, + managed_files, + jt_exe=jt_exe, + jt_enabled=jt_enabled, + overwrite_templates=not site_mode, + ) + + if site_mode: + _copy_artifacts( + bundle_dir, + role, + _host_role_files_dir(out_dir, fqdn or "", role), + exclude_rels=templated, + ) + else: + _copy_artifacts( + bundle_dir, + role, + os.path.join(role_dir, "files"), + exclude_rels=templated, + ) + + files_var = _build_managed_files_var( + managed_files, + templated, + notify_other=None, + notify_systemd=None, + ) + + jt_map = _yaml_load_mapping(jt_vars) if jt_vars.strip() else {} + vars_map: Dict[str, Any] = {f"{var_prefix}_managed_files": files_var} + vars_map = _merge_mappings_overwrite(vars_map, jt_map) + + if site_mode: + _write_role_defaults(role_dir, {f"{var_prefix}_managed_files": []}) + _write_hostvars(out_dir, fqdn or "", role, vars_map) + else: + _write_role_defaults(role_dir, vars_map) + + tasks = "---\n" + _render_generic_files_tasks( + var_prefix, include_restart_notify=False + ) + with open( + os.path.join(role_dir, "tasks", "main.yml"), "w", encoding="utf-8" + ) as f: + f.write(tasks.rstrip() + "\n") + + with open( + os.path.join(role_dir, "handlers", "main.yml"), "w", encoding="utf-8" + ) as f: + f.write("---\n") + + with open( + os.path.join(role_dir, "meta", "main.yml"), "w", encoding="utf-8" + ) as f: + f.write("---\ndependencies: []\n") + + readme = ( + f"""# {role} + +User-requested extra file harvesting. + +## Include patterns +""" + + ("\n".join([f"- {p}" for p in include_pats]) or "- (none)") + + """\n +## Exclude patterns +""" + + ("\n".join([f"- {p}" for p in exclude_pats]) or "- (none)") + + """\n +## Managed files +""" + + ("\n".join([f"- {mf.get('path')}" for mf in managed_files]) or "- (none)") + + """\n +## Excluded +""" + + ( + "\n".join([f"- {e.get('path')} ({e.get('reason')})" for e in excluded]) + or "- (none)" + ) + + """\n +## Notes +""" + + ("\n".join([f"- {n}" for n in notes]) or "- (none)") + + """\n""" + ) + with open(os.path.join(role_dir, "README.md"), "w", encoding="utf-8") as f: + f.write(readme) + + manifested_extra_paths_roles.append(role) + + manifested_usr_local_custom_roles.append(role) + # ------------------------- # ------------------------- @@ -1412,6 +1526,7 @@ Generated for package `{pkg}`. + manifested_service_roles + manifested_etc_custom_roles + manifested_usr_local_custom_roles + + manifested_extra_paths_roles + manifested_users_roles ) diff --git a/enroll/pathfilter.py b/enroll/pathfilter.py new file mode 100644 index 0000000..9df4afa --- /dev/null +++ b/enroll/pathfilter.py @@ -0,0 +1,293 @@ +from __future__ import annotations + +import glob +import os +import re +from dataclasses import dataclass +from pathlib import PurePosixPath +from typing import List, Optional, Sequence, Set, Tuple + + +_REGEX_PREFIXES = ("re:", "regex:") + + +def _has_glob_chars(s: str) -> bool: + return any(ch in s for ch in "*?[") + + +def _norm_abs(p: str) -> str: + """Normalise a path-ish string to an absolute POSIX path. + + We treat inputs that don't start with '/' as being relative to '/'. + """ + + p = p.strip() + if not p: + return "/" + if not p.startswith("/"): + p = "/" + p + # `normpath` keeps a leading '/' for absolute paths. + return os.path.normpath(p) + + +def _posix_match(path: str, pattern: str) -> bool: + """Path matching with glob semantics. + + Uses PurePosixPath.match which: + - treats '/' as a segment separator + - supports '**' for recursive matching + + Both `path` and `pattern` are treated as absolute paths. + """ + + # PurePosixPath.match is anchored and works best on relative strings. + p = path.lstrip("/") + pat = pattern.lstrip("/") + try: + return PurePosixPath(p).match(pat) + except Exception: + # If the pattern is somehow invalid, fail closed. + return False + + +def _regex_literal_prefix(regex: str) -> str: + """Best-effort literal prefix extraction for a regex. + + This lets us pick a starting directory to walk when expanding regex-based + include patterns. + """ + + s = regex + if s.startswith("^"): + s = s[1:] + out: List[str] = [] + escaped = False + meta = set(".^$*+?{}[]\\|()") + for ch in s: + if escaped: + out.append(ch) + escaped = False + continue + if ch == "\\": + escaped = True + continue + if ch in meta: + break + out.append(ch) + return "".join(out) + + +@dataclass(frozen=True) +class CompiledPathPattern: + raw: str + kind: str # 'prefix' | 'glob' | 'regex' + value: str + regex: Optional[re.Pattern[str]] = None + + def matches(self, path: str) -> bool: + p = _norm_abs(path) + + if self.kind == "regex": + if not self.regex: + return False + # Search (not match) so users can write unanchored patterns. + return self.regex.search(p) is not None + + if self.kind == "glob": + return _posix_match(p, self.value) + + # prefix + pref = self.value.rstrip("/") + return p == pref or p.startswith(pref + "/") + + +def compile_path_pattern(raw: str) -> CompiledPathPattern: + s = raw.strip() + for pre in _REGEX_PREFIXES: + if s.startswith(pre): + rex = s[len(pre) :].strip() + try: + return CompiledPathPattern( + raw=raw, kind="regex", value=rex, regex=re.compile(rex) + ) + except re.error: + # Treat invalid regexes as non-matching. + return CompiledPathPattern(raw=raw, kind="regex", value=rex, regex=None) + + # If the user explicitly says glob:, honour it. + if s.startswith("glob:"): + pat = s[len("glob:") :].strip() + return CompiledPathPattern(raw=raw, kind="glob", value=_norm_abs(pat)) + + # Heuristic: if it contains glob metacharacters, treat as a glob. + if _has_glob_chars(s) or "**" in s: + return CompiledPathPattern(raw=raw, kind="glob", value=_norm_abs(s)) + + # Otherwise treat as an exact path-or-prefix (dir subtree). + return CompiledPathPattern(raw=raw, kind="prefix", value=_norm_abs(s)) + + +@dataclass +class PathFilter: + """User-provided path filters. + + Semantics: + - exclude patterns always win + - include patterns are used only to expand *additional* files to harvest + (they do not restrict the default harvest set) + + Patterns: + - By default: glob-like (supports '**') + - Regex: prefix with 're:' or 'regex:' + - Force glob: prefix with 'glob:' + - A plain path without wildcards matches that path and everything under it + (directory-prefix behavior). + + Examples: + --exclude-path /usr/local/bin/docker-* + --include-path /home/*/.bashrc + --include-path 're:^/home/[^/]+/.config/myapp/.*$' + """ + + include: Sequence[str] = () + exclude: Sequence[str] = () + + def __post_init__(self) -> None: + self._include = [ + compile_path_pattern(p) for p in self.include if str(p).strip() + ] + self._exclude = [ + compile_path_pattern(p) for p in self.exclude if str(p).strip() + ] + + def is_excluded(self, path: str) -> bool: + for pat in self._exclude: + if pat.matches(path): + return True + return False + + def iter_include_patterns(self) -> List[CompiledPathPattern]: + return list(self._include) + + +def expand_includes( + patterns: Sequence[CompiledPathPattern], + *, + exclude: Optional[PathFilter] = None, + max_files: int = 4000, +) -> Tuple[List[str], List[str]]: + """Expand include patterns into concrete file paths. + + Returns (paths, notes). The returned paths are absolute paths. + + This function is intentionally conservative: + - symlinks are ignored (both dirs and files) + - the number of collected files is capped + + Regex patterns are expanded by walking a best-effort inferred root. + """ + + out: List[str] = [] + notes: List[str] = [] + seen: Set[str] = set() + + def _maybe_add_file(p: str) -> None: + if len(out) >= max_files: + return + p = _norm_abs(p) + if exclude and exclude.is_excluded(p): + return + if p in seen: + return + if not os.path.isfile(p) or os.path.islink(p): + return + seen.add(p) + out.append(p) + + def _walk_dir(root: str, match: Optional[CompiledPathPattern] = None) -> None: + root = _norm_abs(root) + if not os.path.isdir(root) or os.path.islink(root): + return + for dirpath, dirnames, filenames in os.walk(root, followlinks=False): + # Prune excluded directories early. + if exclude: + dirnames[:] = [ + d + for d in dirnames + if not exclude.is_excluded(os.path.join(dirpath, d)) + and not os.path.islink(os.path.join(dirpath, d)) + ] + for fn in filenames: + if len(out) >= max_files: + return + p = os.path.join(dirpath, fn) + if os.path.islink(p) or not os.path.isfile(p): + continue + if exclude and exclude.is_excluded(p): + continue + if match is not None and not match.matches(p): + continue + if p in seen: + continue + seen.add(p) + out.append(_norm_abs(p)) + + for pat in patterns: + if len(out) >= max_files: + notes.append( + f"Include cap reached ({max_files}); some includes were not expanded." + ) + break + + matched_any = False + + if pat.kind == "prefix": + p = pat.value + if os.path.isfile(p) and not os.path.islink(p): + _maybe_add_file(p) + matched_any = True + elif os.path.isdir(p) and not os.path.islink(p): + before = len(out) + _walk_dir(p) + matched_any = len(out) > before + else: + # Still allow prefix patterns that don't exist now (e.g. remote different) + # by matching nothing rather than erroring. + matched_any = False + + elif pat.kind == "glob": + # Use glob for expansion; also walk directories that match. + gpat = pat.value + hits = glob.glob(gpat, recursive=True) + for h in hits: + if len(out) >= max_files: + break + h = _norm_abs(h) + if exclude and exclude.is_excluded(h): + continue + if os.path.isdir(h) and not os.path.islink(h): + before = len(out) + _walk_dir(h) + if len(out) > before: + matched_any = True + elif os.path.isfile(h) and not os.path.islink(h): + _maybe_add_file(h) + matched_any = True + + else: # regex + rex = pat.value + prefix = _regex_literal_prefix(rex) + # Determine a walk root. If we can infer an absolute prefix, use its + # directory; otherwise fall back to '/'. + if prefix.startswith("/"): + root = os.path.dirname(prefix) or "/" + else: + root = "/" + before = len(out) + _walk_dir(root, match=pat) + matched_any = len(out) > before + + if not matched_any: + notes.append(f"Include pattern matched no files: {pat.raw!r}") + + return out, notes diff --git a/enroll/remote.py b/enroll/remote.py index 469248d..9618512 100644 --- a/enroll/remote.py +++ b/enroll/remote.py @@ -1,6 +1,7 @@ from __future__ import annotations import os +import shlex import shutil import tarfile import tempfile @@ -97,6 +98,8 @@ def remote_harvest( remote_python: str = "python3", dangerous: bool = False, no_sudo: bool = False, + include_paths: Optional[list[str]] = None, + exclude_paths: Optional[list[str]] = None, ) -> Path: """Run enroll harvest on a remote host via SSH and pull the bundle locally. @@ -165,13 +168,25 @@ def remote_harvest( sftp.put(str(pyz), rapp) # Run remote harvest. - _cmd = f"{remote_python} {rapp} harvest --out {rbundle}" + argv: list[str] = [ + remote_python, + rapp, + "harvest", + "--out", + rbundle, + ] + if dangerous: + argv.append("--dangerous") + for p in include_paths or []: + argv.extend(["--include-path", str(p)]) + for p in exclude_paths or []: + argv.extend(["--exclude-path", str(p)]) + + _cmd = " ".join(shlex.quote(a) for a in argv) if not no_sudo: cmd = f"sudo {_cmd}" else: cmd = _cmd - if dangerous: - cmd += " --dangerous" rc, out, err = _ssh_run(ssh, cmd) if rc != 0: raise RuntimeError( diff --git a/tests/test_cli.py b/tests/test_cli.py index ca3bfa6..4477b24 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -6,9 +6,17 @@ import enroll.cli as cli def test_cli_harvest_subcommand_calls_harvest(monkeypatch, capsys, tmp_path): called = {} - def fake_harvest(out: str, dangerous: bool = False): + def fake_harvest( + out: str, + dangerous: bool = False, + include_paths=None, + exclude_paths=None, + **_kwargs, + ): called["out"] = out called["dangerous"] = dangerous + called["include_paths"] = include_paths or [] + called["exclude_paths"] = exclude_paths or [] return str(tmp_path / "state.json") monkeypatch.setattr(cli, "harvest", fake_harvest) @@ -17,6 +25,8 @@ def test_cli_harvest_subcommand_calls_harvest(monkeypatch, capsys, tmp_path): cli.main() assert called["out"] == str(tmp_path) assert called["dangerous"] is False + assert called["include_paths"] == [] + assert called["exclude_paths"] == [] captured = capsys.readouterr() assert str(tmp_path / "state.json") in captured.out @@ -55,8 +65,16 @@ def test_cli_manifest_subcommand_calls_manifest(monkeypatch, tmp_path): def test_cli_enroll_subcommand_runs_harvest_then_manifest(monkeypatch, tmp_path): calls = [] - def fake_harvest(bundle_dir: str, dangerous: bool = False): - calls.append(("harvest", bundle_dir, dangerous)) + def fake_harvest( + bundle_dir: str, + dangerous: bool = False, + include_paths=None, + exclude_paths=None, + **_kwargs, + ): + calls.append( + ("harvest", bundle_dir, dangerous, include_paths or [], exclude_paths or []) + ) return str(tmp_path / "bundle" / "state.json") def fake_manifest(bundle_dir: str, out_dir: str, **kwargs): @@ -87,7 +105,7 @@ def test_cli_enroll_subcommand_runs_harvest_then_manifest(monkeypatch, tmp_path) cli.main() assert calls == [ - ("harvest", str(tmp_path / "bundle"), False), + ("harvest", str(tmp_path / "bundle"), False, [], []), ("manifest", str(tmp_path / "bundle"), str(tmp_path / "ansible"), None, "auto"), ] @@ -95,9 +113,17 @@ def test_cli_enroll_subcommand_runs_harvest_then_manifest(monkeypatch, tmp_path) def test_cli_harvest_dangerous_flag_is_forwarded(monkeypatch, tmp_path): called = {} - def fake_harvest(out: str, dangerous: bool = False): + def fake_harvest( + out: str, + dangerous: bool = False, + include_paths=None, + exclude_paths=None, + **_kwargs, + ): called["out"] = out called["dangerous"] = dangerous + called["include_paths"] = include_paths or [] + called["exclude_paths"] = exclude_paths or [] return str(tmp_path / "state.json") monkeypatch.setattr(cli, "harvest", fake_harvest) @@ -107,6 +133,8 @@ def test_cli_harvest_dangerous_flag_is_forwarded(monkeypatch, tmp_path): cli.main() assert called["dangerous"] is True + assert called["include_paths"] == [] + assert called["exclude_paths"] == [] def test_cli_harvest_remote_calls_remote_harvest_and_uses_cache_dir( @@ -131,6 +159,9 @@ def test_cli_harvest_remote_calls_remote_harvest_and_uses_cache_dir( remote_user, dangerous, no_sudo, + include_paths=None, + exclude_paths=None, + **_kwargs, ): called.update( { @@ -140,6 +171,8 @@ def test_cli_harvest_remote_calls_remote_harvest_and_uses_cache_dir( "remote_user": remote_user, "dangerous": dangerous, "no_sudo": no_sudo, + "include_paths": include_paths or [], + "exclude_paths": exclude_paths or [], } ) return cache_dir / "state.json" @@ -169,6 +202,8 @@ def test_cli_harvest_remote_calls_remote_harvest_and_uses_cache_dir( assert called["remote_user"] == "alice" assert called["dangerous"] is False assert called["no_sudo"] is False + assert called["include_paths"] == [] + assert called["exclude_paths"] == [] def test_cli_single_shot_remote_without_harvest_prints_state_path( From 9641637d4d27df0c1d524a20c63adae90ff424fa Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sat, 20 Dec 2025 18:24:46 +1100 Subject: [PATCH 02/69] Add support for an enroll.ini config file to store arguments per subcommand, to avoid having to remember them all for repetitive executions. --- CHANGELOG.md | 2 + README.md | 56 ++++++++++ debian/changelog | 9 ++ enroll/cli.py | 264 ++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 330 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2d8d6e4..90478e5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ * Allow the user to add extra paths to harvest, or paths to ignore, using `--exclude-path` and `--include-path` arguments. + * Add support for an enroll.ini config file to store arguments per subcommand, to avoid having to remember + them all for repetitive executions. # 0.1.2 diff --git a/README.md b/README.md index 84a6965..a5d2157 100644 --- a/README.md +++ b/README.md @@ -336,3 +336,59 @@ ansible-playbook -i "localhost," -c local /tmp/enroll-ansible/playbook.yml ```bash ansible-playbook /tmp/enroll-ansible/playbooks/"$(hostname -f)".yml ``` + +## Configuration file + +As can be seen above, there are a lot of powerful 'permutations' available to all four subcommands. + +Sometimes, it can be easier to store them in a config file so you don't have to remember them! + +Enroll supports reading an ini-style file of all the arguments for each subcommand. + +### Location of the config file + +The path the config file can be specified with `-c` or `--config` on the command-line. Otherwise, +Enroll will look for `./enroll.ini`, `./.enroll.ini` (in the current working directory), +``~/.config/enroll/enroll.ini` (or `$XDG_CONFIG_HOME/enroll/enroll.ini`). + +You may also pass `--no-config` if you deliberately want to ignore the config file even if it existed. + +### Precedence + +Highest wins: + + * Explicit CLI flags + * INI config ([cmd], [enroll]) + * argparse defaults + +### Example config file + +Here is an example. + +Whenever an argument on the command-line has a 'hyphen' in it, just be sure to change it to an underscore in the ini file. + +```ini +[enroll] +# (future global flags may live here) + +[harvest] +dangerous = false +include_path = + /home/*/.bashrc + /home/*/.profile +exclude_path = /usr/local/bin/docker-*, /usr/local/bin/some-tool +# remote_host = yourserver.example.com +# remote_user = you +# remote_port = 2222 + +[manifest] +# you can set defaults here too, e.g. +no_jinjaturtle = true +sops = 00AE817C24A10C2540461A9C1D7CDE0234DB458D + +[single-shot] +# if you use single-shot, put its defaults here. +# It does not inherit those of the subsections above, so you +# may wish to repeat them here. +include_path = re:^/home/[^/]+/\.config/myapp/.*$ +``` diff --git a/debian/changelog b/debian/changelog index 0b16cfa..f6ba2f7 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,12 @@ +enroll (0.1.3) unstable; urgency=medium + + * Allow the user to add extra paths to harvest, or paths to ignore, using `--exclude-path` and `--include-path` + arguments. + * Add support for an enroll.ini config file to store arguments per subcommand, to avoid having to remember + them all for repetitive executions. + + -- Miguel Jacq Sat, 20 Dec 2025 18:24:00 +1100 + enroll (0.1.2) unstable; urgency=medium * Include files from `/usr/local/bin` and `/usr/local/etc` in harvest (assuming they aren't binaries or diff --git a/enroll/cli.py b/enroll/cli.py index f6efe11..e5f729d 100644 --- a/enroll/cli.py +++ b/enroll/cli.py @@ -1,7 +1,9 @@ from __future__ import annotations import argparse +import configparser import os +import sys import tarfile import tempfile from pathlib import Path @@ -15,6 +17,232 @@ from .remote import remote_harvest from .sopsutil import SopsError, encrypt_file_binary +def _discover_config_path(argv: list[str]) -> Optional[Path]: + """Return the config path to use, if any. + + Precedence: + 1) --no-config disables loading. + 2) --config PATH (or -c PATH) + 3) $ENROLL_CONFIG + 4) ./enroll.ini, ./.enroll.ini + 5) $XDG_CONFIG_HOME/enroll/enroll.ini (or ~/.config/enroll/enroll.ini) + + The config file is optional; if no file is found, returns None. + """ + + # Quick scan for explicit flags without needing to build the full parser. + if "--no-config" in argv: + return None + + def _value_after(flag: str) -> Optional[str]: + try: + i = argv.index(flag) + except ValueError: + return None + if i + 1 >= len(argv): + return None + return argv[i + 1] + + p = _value_after("--config") or _value_after("-c") + if p: + return Path(p).expanduser() + + envp = os.environ.get("ENROLL_CONFIG") + if envp: + return Path(envp).expanduser() + + cwd = Path.cwd() + for name in ("enroll.ini", ".enroll.ini"): + cp = cwd / name + if cp.exists() and cp.is_file(): + return cp + + xdg = os.environ.get("XDG_CONFIG_HOME") + if xdg: + base = Path(xdg).expanduser() + else: + base = Path.home() / ".config" + cp = base / "enroll" / "enroll.ini" + if cp.exists() and cp.is_file(): + return cp + + return None + + +def _parse_bool(s: str) -> Optional[bool]: + v = str(s).strip().lower() + if v in {"1", "true", "yes", "y", "on"}: + return True + if v in {"0", "false", "no", "n", "off"}: + return False + return None + + +def _action_lookup(p: argparse.ArgumentParser) -> dict[str, argparse.Action]: + """Map config keys -> argparse actions for a parser. + + Accepts both dest names and long option names without leading dashes, + normalized with '-' -> '_'. + """ + + m: dict[str, argparse.Action] = {} + for a in p._actions: # noqa: SLF001 (argparse internal) + if not getattr(a, "dest", None): + continue + dest = str(a.dest).strip().lower() + if dest: + m[dest] = a + for opt in getattr(a, "option_strings", []) or []: + k = opt.lstrip("-").strip().lower() + if k: + m[k.replace("-", "_")] = a + m[k] = a + return m + + +def _choose_flag(a: argparse.Action) -> Optional[str]: + # Prefer a long flag if available (e.g. --dangerous over -d) + for s in getattr(a, "option_strings", []) or []: + if s.startswith("--"): + return s + for s in getattr(a, "option_strings", []) or []: + return s + return None + + +def _split_list_value(v: str) -> list[str]: + # Support comma-separated and/or multi-line lists. + raw = str(v) + if "\n" in raw: + parts = [p.strip() for p in raw.splitlines()] + return [p for p in parts if p] + if "," in raw: + parts = [p.strip() for p in raw.split(",")] + return [p for p in parts if p] + raw = raw.strip() + return [raw] if raw else [] + + +def _section_to_argv( + p: argparse.ArgumentParser, cfg: configparser.ConfigParser, section: str +) -> list[str]: + """Translate an INI section into argv tokens for this parser.""" + if not cfg.has_section(section): + return [] + + lookup = _action_lookup(p) + out: list[str] = [] + + for k, v in cfg.items(section): + key = str(k).strip().lower().replace("-", "_") + # Avoid recursion / confusing self-configuration. + if key in {"config", "no_config"}: + continue + + a = lookup.get(key) + if not a: + # Unknown keys are ignored (but we try to be helpful). + print( + f"warning: config [{section}] contains unknown option '{k}' (ignored)", + file=sys.stderr, + ) + continue + + flag = _choose_flag(a) + if not flag: + continue + + # Boolean flags + if isinstance(a, argparse._StoreTrueAction): # noqa: SLF001 + b = _parse_bool(v) + if b is True: + out.append(flag) + continue + if isinstance(a, argparse._StoreFalseAction): # noqa: SLF001 + b = _parse_bool(v) + if b is False: + out.append(flag) + continue + + # Repeated options + if isinstance(a, argparse._AppendAction): # noqa: SLF001 + for item in _split_list_value(v): + out.extend([flag, item]) + continue + + # Count flags (rare, but easy to support) + if isinstance(a, argparse._CountAction): # noqa: SLF001 + b = _parse_bool(v) + if b is True: + out.append(flag) + else: + try: + n = int(str(v).strip()) + except ValueError: + n = 0 + out.extend([flag] * max(0, n)) + continue + + # Standard scalar options + sval = str(v).strip() + if sval: + out.extend([flag, sval]) + + return out + + +def _inject_config_argv( + argv: list[str], + *, + cfg_path: Optional[Path], + root_parser: argparse.ArgumentParser, + subparsers: dict[str, argparse.ArgumentParser], +) -> list[str]: + """Return argv with config-derived tokens inserted. + + We insert: + - [enroll] options before the subcommand + - [] options immediately after the subcommand token + + CLI flags always win because they come later in argv. + """ + + if not cfg_path: + return argv + cfg_path = Path(cfg_path).expanduser() + if not (cfg_path.exists() and cfg_path.is_file()): + return argv + + cfg = configparser.ConfigParser() + try: + cfg.read(cfg_path, encoding="utf-8") + except (OSError, configparser.Error) as e: + raise SystemExit(f"error: failed to read config file {cfg_path}: {e}") + + global_tokens = _section_to_argv(root_parser, cfg, "enroll") + + # Find the subcommand token position. + cmd_pos: Optional[int] = None + cmd_name: Optional[str] = None + for i, tok in enumerate(argv): + if tok in subparsers: + cmd_pos = i + cmd_name = tok + break + if cmd_pos is None or cmd_name is None: + # No subcommand found (argparse will handle the error); only apply global. + return global_tokens + argv + + cmd_tokens = _section_to_argv(subparsers[cmd_name], cfg, cmd_name) + # Also accept section names with '_' in place of '-' (e.g. [single_shot]) + if "-" in cmd_name: + alt = cmd_name.replace("-", "_") + if alt != cmd_name: + cmd_tokens += _section_to_argv(subparsers[cmd_name], cfg, alt) + + return global_tokens + argv[: cmd_pos + 1] + cmd_tokens + argv[cmd_pos + 1 :] + + def _resolve_sops_out_file(out: Optional[str], *, hint: str) -> Path: """Resolve an output *file* path for --sops mode. @@ -95,6 +323,22 @@ def _add_remote_args(p: argparse.ArgumentParser) -> None: "--remote-host", help="SSH host to run harvesting on (if set, harvest runs remotely and is pulled locally).", ) + + +def _add_config_args(p: argparse.ArgumentParser) -> None: + p.add_argument( + "-c", + "--config", + help=( + "Path to an INI config file for default options. If omitted, enroll will look for " + "./enroll.ini, ./.enroll.ini, or ~/.config/enroll/enroll.ini (or $XDG_CONFIG_HOME/enroll/enroll.ini)." + ), + ) + p.add_argument( + "--no-config", + action="store_true", + help="Do not load any INI config file (even if one would be auto-discovered).", + ) p.add_argument( "--remote-port", type=int, @@ -110,9 +354,11 @@ def _add_remote_args(p: argparse.ArgumentParser) -> None: def main() -> None: ap = argparse.ArgumentParser(prog="enroll") + _add_config_args(ap) sub = ap.add_subparsers(dest="cmd", required=True) h = sub.add_parser("harvest", help="Harvest service/package/config state") + _add_config_args(h) h.add_argument( "--out", help=( @@ -163,6 +409,7 @@ def main() -> None: _add_remote_args(h) m = sub.add_parser("manifest", help="Render Ansible roles from a harvest") + _add_config_args(m) m.add_argument( "--harvest", required=True, @@ -195,6 +442,7 @@ def main() -> None: s = sub.add_parser( "single-shot", help="Harvest state, then manifest Ansible code, in one shot" ) + _add_config_args(s) s.add_argument( "--harvest", help=( @@ -255,6 +503,7 @@ def main() -> None: _add_remote_args(s) d = sub.add_parser("diff", help="Compare two harvests and report differences") + _add_config_args(d) d.add_argument( "--old", required=True, @@ -338,7 +587,20 @@ def main() -> None: help="Environment variable containing SMTP password (optional).", ) - args = ap.parse_args() + argv = sys.argv[1:] + cfg_path = _discover_config_path(argv) + argv = _inject_config_argv( + argv, + cfg_path=cfg_path, + root_parser=ap, + subparsers={ + "harvest": h, + "manifest": m, + "single-shot": s, + "diff": d, + }, + ) + args = ap.parse_args(argv) remote_host: Optional[str] = getattr(args, "remote_host", None) From cf819f755a8ce200a2c6079f70a4ef14cc9efe06 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sat, 20 Dec 2025 18:26:04 +1100 Subject: [PATCH 03/69] 0.1.3 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b5a07ab..541eded 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "enroll" -version = "0.1.2" +version = "0.1.3" description = "Enroll a server's running state retrospectively into Ansible" authors = ["Miguel Jacq "] license = "GPL-3.0-or-later" From 59239eb2d27d799628e1d1e890325ed2947e6b91 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sat, 20 Dec 2025 18:38:05 +1100 Subject: [PATCH 04/69] Fix formatting in README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a5d2157..00f9d98 100644 --- a/README.md +++ b/README.md @@ -349,7 +349,7 @@ Enroll supports reading an ini-style file of all the arguments for each subcomma The path the config file can be specified with `-c` or `--config` on the command-line. Otherwise, Enroll will look for `./enroll.ini`, `./.enroll.ini` (in the current working directory), -``~/.config/enroll/enroll.ini` (or `$XDG_CONFIG_HOME/enroll/enroll.ini`). +`~/.config/enroll/enroll.ini` (or `$XDG_CONFIG_HOME/enroll/enroll.ini`). You may also pass `--no-config` if you deliberately want to ignore the config file even if it existed. From 51196a0a2b1615b7c463aebf59d861080a2b0ff5 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 22 Dec 2025 17:28:10 +1100 Subject: [PATCH 05/69] Fix trivy exit code --- .forgejo/workflows/trivy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.forgejo/workflows/trivy.yml b/.forgejo/workflows/trivy.yml index fad2f6f..d5585f4 100644 --- a/.forgejo/workflows/trivy.yml +++ b/.forgejo/workflows/trivy.yml @@ -23,7 +23,7 @@ jobs: - name: Run trivy run: | - trivy fs --no-progress --ignore-unfixed --format table --disable-telemetry . + trivy fs --no-progress --ignore-unfixed --format table --disable-telemetry --skip-version-check --exit-code 1 . # Notify if any previous step in this job failed - name: Notify on failure From 8c478249d9b2f112ffbdc66e25274160bb7b37e9 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Tue, 23 Dec 2025 17:22:50 +1100 Subject: [PATCH 06/69] Add build-deb action workflow --- .forgejo/workflows/build-deb.yml | 65 ++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 .forgejo/workflows/build-deb.yml diff --git a/.forgejo/workflows/build-deb.yml b/.forgejo/workflows/build-deb.yml new file mode 100644 index 0000000..28276df --- /dev/null +++ b/.forgejo/workflows/build-deb.yml @@ -0,0 +1,65 @@ +name: CI + +on: + push: + +jobs: + test: + runs-on: docker + + steps: + - name: Install system dependencies + run: | + apt-get update + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + build-essential \ + devscripts \ + debhelper \ + dh-python \ + pybuild-plugin-pyproject \ + python3-all \ + python3-poetry-core \ + python3-yaml \ + python3-paramiko \ + rsync \ + ca-certificates + + - name: Checkout + uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Build deb + run: | + mkdir /out + + rsync -a --delete \ + --exclude '.git' \ + --exclude '.venv' \ + --exclude 'dist' \ + --exclude 'build' \ + --exclude '__pycache__' \ + --exclude '.pytest_cache' \ + --exclude '.mypy_cache' \ + ./ /out/ + + cd /out/ + export DEBEMAIL="mig@mig5.net" + export DEBFULLNAME="Miguel Jacq" + + dch --distribution "trixie" --local "~trixie" "CI build for trixie" + dpkg-buildpackage -us -uc -b + + # Notify if any previous step in this job failed + - name: Notify on failure + if: ${{ failure() }} + env: + WEBHOOK_URL: ${{ secrets.NODERED_WEBHOOK_URL }} + REPOSITORY: ${{ forgejo.repository }} + RUN_NUMBER: ${{ forgejo.run_number }} + SERVER_URL: ${{ forgejo.server_url }} + run: | + curl -X POST \ + -H "Content-Type: application/json" \ + -d "{\"repository\":\"$REPOSITORY\",\"run_number\":\"$RUN_NUMBER\",\"status\":\"failure\",\"url\":\"$SERVER_URL/$REPOSITORY/actions/runs/$RUN_NUMBER\"}" \ + "$WEBHOOK_URL" From 4d2250f974195c3f5dd300aacb2d43e7aa6f2d65 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sat, 27 Dec 2025 16:56:30 +1100 Subject: [PATCH 07/69] Add fedora rpm building --- Dockerfile.rpmbuild | 102 ++++++++++++++++++++++++++++++++++++++++++++ README.md | 4 +- poetry.lock | 2 +- pyproject.toml | 4 +- release.sh | 31 ++++++++++++++ rpm/enroll.spec | 47 ++++++++++++++++++++ 6 files changed, 185 insertions(+), 5 deletions(-) create mode 100644 Dockerfile.rpmbuild create mode 100644 rpm/enroll.spec diff --git a/Dockerfile.rpmbuild b/Dockerfile.rpmbuild new file mode 100644 index 0000000..c928cea --- /dev/null +++ b/Dockerfile.rpmbuild @@ -0,0 +1,102 @@ +# syntax=docker/dockerfile:1 +FROM fedora:42 + +RUN set -eux; \ + dnf -y update; \ + dnf -y install \ + rpm-build \ + rpmdevtools \ + redhat-rpm-config \ + gcc \ + make \ + findutils \ + tar \ + gzip \ + rsync \ + python3 \ + python3-devel \ + python3-setuptools \ + python3-wheel \ + pyproject-rpm-macros \ + python3-rpm-macros \ + python3-yaml \ + python3-paramiko \ + openssl-devel \ + python3-poetry-core ; \ + dnf -y clean all + +# Build runner script (copies repo, tars, runs rpmbuild) +RUN set -eux; cat > /usr/local/bin/build-rpm <<'EOF' +#!/usr/bin/env bash +set -euo pipefail + +SRC="${SRC:-/src}" +WORKROOT="${WORKROOT:-/work}" +OUT="${OUT:-/out}" +DEPS_DIR="${DEPS_DIR:-/deps}" + +# Install jinjaturtle from local rpm +# Filter out .src.rpm and debug* subpackages if present. +if [ -d "${DEPS_DIR}" ] && compgen -G "${DEPS_DIR}/*.rpm" > /dev/null; then + mapfile -t rpms < <(ls -1 "${DEPS_DIR}"/*.rpm | grep -vE '(\.src\.rpm$|-(debuginfo|debugsource)-)') + if [ "${#rpms[@]}" -gt 0 ]; then + echo "Installing dependency RPMs from ${DEPS_DIR}:" + printf ' - %s\n' "${rpms[@]}" + dnf -y install "${rpms[@]}" + dnf -y clean all + else + echo "NOTE: Only src/debug RPMs found in ${DEPS_DIR}; nothing installed." >&2 + fi +else + echo "NOTE: No RPMs found in ${DEPS_DIR}. If the build fails with missing python3dist(jinjaturtle)," >&2 + echo " mount your jinjaturtle RPM directory as -v :/deps" >&2 +fi + +mkdir -p "${WORKROOT}" "${OUT}" +WORK="${WORKROOT}/src" +rm -rf "${WORK}" +mkdir -p "${WORK}" + +rsync -a --delete \ + --exclude '.git' \ + --exclude '.venv' \ + --exclude 'dist' \ + --exclude 'build' \ + --exclude '__pycache__' \ + --exclude '.pytest_cache' \ + --exclude '.mypy_cache' \ + "${SRC}/" "${WORK}/" + +cd "${WORK}" + +# Determine version from pyproject.toml unless provided +if [ -n "${VERSION:-}" ]; then + ver="${VERSION}" +else + ver="$(grep -m1 '^version = ' pyproject.toml | sed -E 's/version = "([^"]+)".*/\1/')" +fi + +TOPDIR="${WORKROOT}/rpmbuild" +mkdir -p "${TOPDIR}"/{BUILD,BUILDROOT,RPMS,SOURCES,SPECS,SRPMS} + +tarball="${TOPDIR}/SOURCES/enroll-${ver}.tar.gz" +tar -czf "${tarball}" --transform "s#^#enroll/#" . + +spec_src="rpm/enroll.spec" + +cp -v "${spec_src}" "${TOPDIR}/SPECS/enroll.spec" + +rpmbuild -ba "${TOPDIR}/SPECS/enroll.spec" \ + --define "_topdir ${TOPDIR}" \ + --define "upstream_version ${ver}" + +shopt -s nullglob +cp -v "${TOPDIR}"/RPMS/*/*.rpm "${OUT}/" || true +cp -v "${TOPDIR}"/SRPMS/*.src.rpm "${OUT}/" || true +echo "Artifacts copied to ${OUT}" +EOF + +RUN chmod +x /usr/local/bin/build-rpm + +WORKDIR /work +ENTRYPOINT ["/usr/local/bin/build-rpm"] diff --git a/README.md b/README.md index 00f9d98..5a0db91 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,6 @@ **enroll** inspects a Linux machine (currently Debian-only) and generates Ansible roles/playbooks (and optionally inventory) for what it finds. -It aims to be **optimistic and noninteractive**: - Detects packages that have been installed. - Detects Debian package ownership of `/etc` files using dpkg’s local database. - Captures config that has **changed from packaged defaults** (dpkg conffile hashes + package md5sums when available). @@ -26,9 +25,10 @@ It aims to be **optimistic and noninteractive**: 1) **Harvest**: collect host facts + relevant files into a harvest bundle (`state.json` + harvested artifacts) 2) **Manifest**: turn that harvest into Ansible roles/playbooks (and optionally inventory) -Additionally: +Additionally, some other functionalities exist: - **Diff**: compare two harvests and report what changed (packages/services/users/files) since the previous snapshot. +- **Single-shot mode**: run both harvest and manifest at once. --- diff --git a/poetry.lock b/poetry.lock index 1f2948d..0a90711 100644 --- a/poetry.lock +++ b/poetry.lock @@ -923,4 +923,4 @@ zstd = ["backports-zstd (>=1.0.0)"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "c3466a6595a9822763431a6dff0c7f835407a2591b92d5995592f8e6802c774a" +content-hash = "20623104a1a5f4c6d4aaa759f25b2591d5de345d1464e727eb4140a6ef9a5b6e" diff --git a/pyproject.toml b/pyproject.toml index 541eded..3079404 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,8 +10,8 @@ repository = "https://git.mig5.net/mig5/enroll" [tool.poetry.dependencies] python = "^3.10" -pyyaml = "^6.0.3" -paramiko = "^4.0.0" +pyyaml = "^6" +paramiko = ">=3.5" [tool.poetry.scripts] enroll = "enroll.cli:main" diff --git a/release.sh b/release.sh index fe99a52..fdbe771 100755 --- a/release.sh +++ b/release.sh @@ -42,3 +42,34 @@ for dist in ${DISTS[@]}; do debfile=$(ls -1 dist/${release}/*.deb) reprepro -b /home/user/git/repo includedeb "${release}" "${debfile}" done + +# RPM +sudo apt-get -y install createrepo-c rpm +docker build -f Dockerfile.rpmbuild -t enroll:f42 --progress=plain . +docker run --rm -v "$PWD":/src -v "$PWD/dist/rpm":/out -v "$HOME/git/jinjaturtle/dist/rpm":/deps:ro enroll:f42 +sudo chown -R "${USER}" "$PWD/dist" + +REPO_ROOT="${HOME}/git/repo_rpm" +RPM_REPO="${REPO_ROOT}/rpm/x86_64" +BUILD_OUTPUT="${HOME}/git/enroll/dist" +REMOTE="letessier.mig5.net:/opt/repo_rpm" +KEYID="00AE817C24A10C2540461A9C1D7CDE0234DB458D" + +echo "==> Updating RPM repo..." +mkdir -p "$RPM_REPO" + +for file in `ls -1 "${BUILD_OUTPUT}/rpm"`; do + rpmsign --addsign "${BUILD_OUTPUT}/rpm/$file" +done + +cp "${BUILD_OUTPUT}/rpm/"*.rpm "$RPM_REPO/" + +createrepo_c "$RPM_REPO" + +echo "==> Signing repomd.xml..." +qubes-gpg-client --local-user "$KEYID" --detach-sign --armor "$RPM_REPO/repodata/repomd.xml" > "$RPM_REPO/repodata/repomd.xml.asc" + +echo "==> Syncing repo to server..." +rsync -aHPvz --exclude=.git --delete "$REPO_ROOT/" "$REMOTE/" + +echo "Done!" diff --git a/rpm/enroll.spec b/rpm/enroll.spec new file mode 100644 index 0000000..403d6da --- /dev/null +++ b/rpm/enroll.spec @@ -0,0 +1,47 @@ +%global upstream_version 0.1.3 + +Name: enroll +Version: %{upstream_version} +Release: 1%{?dist}.enroll1 +Summary: Enroll a server's running state retrospectively into Ansible. + +License: GPL-3.0-or-later +URL: https://git.mig5.net/mig5/enroll +Source0: %{name}-%{version}.tar.gz + +BuildArch: noarch + +BuildRequires: pyproject-rpm-macros +BuildRequires: python3-devel +BuildRequires: python3-poetry-core + +Requires: python3-yaml +Requires: python3-paramiko + +# Make sure private repo dependency is pulled in by package name as well. +Recommends: jinjaturtle + +%description +Enroll a server's running state retrospectively into Ansible. + +%prep +%autosetup -n enroll + +%generate_buildrequires +%pyproject_buildrequires + +%build +%pyproject_wheel + +%install +%pyproject_install +%pyproject_save_files enroll + +%files -f %{pyproject_files} +%license LICENSE +%doc README.md CHANGELOG.md +%{_bindir}/enroll + +%changelog +* Sat Dec 27 2025 Miguel Jacq - %{version}-%{release} +- Initial RPM packaging for Fedora 42 From 054a6192d170dcd1bf418263376287f711ff6dd6 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sat, 27 Dec 2025 19:02:22 +1100 Subject: [PATCH 08/69] Capture more singletons in /etc and avoid apt duplication --- CHANGELOG.md | 5 + debian/changelog | 7 + enroll/harvest.py | 367 +++++++++++++++++++++++++++++++++++++++++++--- enroll/ignore.py | 22 +++ enroll/systemd.py | 97 ++++++++++++ rpm/enroll.spec | 5 +- 6 files changed, 481 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 90478e5..a51be14 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +# 0.1.4 + + * Attempt to capture more stuff from /etc that might not be attributable to a specific package. This includes common singletons and systemd timers + * Avoid duplicate apt data in package-specific roles. + # 0.1.3 * Allow the user to add extra paths to harvest, or paths to ignore, using `--exclude-path` and `--include-path` diff --git a/debian/changelog b/debian/changelog index f6ba2f7..17b8985 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,10 @@ +enroll (0.1.4) unstable; urgency=medium + + * Attempt to capture more stuff from /etc that might not be attributable to a specific package. This includes common singletons and systemd timers + * Avoid duplicate apt data in package-specific roles. + + -- Miguel Jacq Sat, 27 Dec 2025 19:00:00 +1100 + enroll (0.1.3) unstable; urgency=medium * Allow the user to add extra paths to harvest, or paths to ignore, using `--exclude-path` and `--include-path` diff --git a/enroll/harvest.py b/enroll/harvest.py index 48242d6..0543355 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -8,7 +8,13 @@ import shutil from dataclasses import dataclass, asdict from typing import Dict, List, Optional, Set -from .systemd import list_enabled_services, get_unit_info, UnitQueryError +from .systemd import ( + list_enabled_services, + list_enabled_timers, + get_unit_info, + get_timer_info, + UnitQueryError, +) from .debian import ( build_dpkg_etc_index, dpkg_owner, @@ -98,24 +104,24 @@ class ExtraPathsSnapshot: ALLOWED_UNOWNED_EXTS = { + ".cnf", ".conf", ".cfg", ".ini", - ".cnf", - ".yaml", - ".yml", ".json", - ".toml", + ".link", + ".mount", + ".netdev", + ".network", + ".path", ".rules", ".service", ".socket", - ".timer", ".target", - ".path", - ".mount", - ".network", - ".netdev", - ".link", + ".timer", + ".toml", + ".yaml", + ".yml", "", # allow extensionless (common in /etc/default and /etc/init.d) } @@ -123,23 +129,24 @@ MAX_UNOWNED_FILES_PER_ROLE = 400 # Directories that are shared across many packages; never attribute unowned files in these trees to a single package. SHARED_ETC_TOPDIRS = { - "default", "apparmor.d", - "network", - "init.d", - "systemd", - "pam.d", - "ssh", - "ssl", - "sudoers.d", + "apt", "cron.d", "cron.daily", "cron.weekly", "cron.monthly", "cron.hourly", + "default", + "init.d", "logrotate.d", - "sysctl.d", "modprobe.d", + "network", + "pam.d", + "ssh", + "ssl", + "sudoers.d", + "sysctl.d", + "systemd", } @@ -256,6 +263,181 @@ def _topdirs_for_package(pkg: str, pkg_to_etc_paths: Dict[str, List[str]]) -> Se return topdirs +# ------------------------- +# System capture helpers +# ------------------------- + +_APT_SOURCE_GLOBS = [ + "/etc/apt/sources.list", + "/etc/apt/sources.list.d/*.list", + "/etc/apt/sources.list.d/*.sources", +] + +_APT_MISC_GLOBS = [ + "/etc/apt/apt.conf", + "/etc/apt/apt.conf.d/*", + "/etc/apt/preferences", + "/etc/apt/preferences.d/*", + "/etc/apt/auth.conf", + "/etc/apt/auth.conf.d/*", + "/etc/apt/trusted.gpg", + "/etc/apt/trusted.gpg.d/*", + "/etc/apt/keyrings/*", +] + +_SYSTEM_CAPTURE_GLOBS: List[tuple[str, str]] = [ + # mounts + ("/etc/fstab", "system_mounts"), + ("/etc/crypttab", "system_mounts"), + # logrotate + ("/etc/logrotate.conf", "system_logrotate"), + ("/etc/logrotate.d/*", "system_logrotate"), + # sysctl / modules + ("/etc/sysctl.conf", "system_sysctl"), + ("/etc/sysctl.d/*", "system_sysctl"), + ("/etc/modprobe.d/*", "system_modprobe"), + ("/etc/modules", "system_modprobe"), + ("/etc/modules-load.d/*", "system_modprobe"), + # cron + ("/etc/crontab", "system_cron"), + ("/etc/cron.d/*", "system_cron"), + ("/etc/anacrontab", "system_cron"), + ("/etc/anacron/*", "system_cron"), + ("/var/spool/cron/crontabs/*", "system_cron"), + ("/var/spool/crontabs/*", "system_cron"), + # network + ("/etc/netplan/*", "system_network"), + ("/etc/systemd/network/*", "system_network"), + ("/etc/network/interfaces", "system_network"), + ("/etc/network/interfaces.d/*", "system_network"), + ("/etc/resolvconf.conf", "system_network"), + ("/etc/resolvconf/resolv.conf.d/*", "system_network"), + # firewall + ("/etc/nftables.conf", "system_firewall"), + ("/etc/nftables.d/*", "system_firewall"), + ("/etc/iptables/rules.v4", "system_firewall"), + ("/etc/iptables/rules.v6", "system_firewall"), + ("/etc/ufw/*", "system_firewall"), + ("/etc/default/ufw", "system_firewall"), + # other + ("/etc/rc.local", "system_rc"), +] + + +def _iter_matching_files(spec: str, *, cap: int = 2000) -> List[str]: + """Expand a glob spec and also walk directories to collect files.""" + out: List[str] = [] + for p in glob.glob(spec): + if len(out) >= cap: + break + if os.path.islink(p): + continue + if os.path.isfile(p): + out.append(p) + continue + if os.path.isdir(p): + for dirpath, _, filenames in os.walk(p): + for fn in filenames: + if len(out) >= cap: + break + fp = os.path.join(dirpath, fn) + if os.path.islink(fp) or not os.path.isfile(fp): + continue + out.append(fp) + if len(out) >= cap: + break + return out + + +def _parse_apt_signed_by(source_files: List[str]) -> Set[str]: + """Return absolute keyring paths referenced via signed-by / Signed-By.""" + out: Set[str] = set() + + # deb line: deb [signed-by=/usr/share/keyrings/foo.gpg] ... + re_signed_by = re.compile(r"signed-by\s*=\s*([^\]\s]+)", re.IGNORECASE) + # deb822: Signed-By: /usr/share/keyrings/foo.gpg + re_signed_by_hdr = re.compile(r"^\s*Signed-By\s*:\s*(.+)$", re.IGNORECASE) + + for sf in source_files: + try: + with open(sf, "r", encoding="utf-8", errors="replace") as f: + for raw in f: + line = raw.strip() + if not line or line.startswith("#"): + continue + + m = re_signed_by_hdr.match(line) + if m: + val = m.group(1).strip() + if val.startswith("|"): + continue + toks = re.split(r"[\s,]+", val) + for t in toks: + if t.startswith("/"): + out.add(t) + continue + + # Try bracketed options first (common for .list files) + if "[" in line and "]" in line: + bracket = line.split("[", 1)[1].split("]", 1)[0] + for mm in re_signed_by.finditer(bracket): + val = mm.group(1).strip().strip("\"'") + for t in re.split(r"[\s,]+", val): + if t.startswith("/"): + out.add(t) + continue + + # Fallback: signed-by= in whole line + for mm in re_signed_by.finditer(line): + val = mm.group(1).strip().strip("\"'") + for t in re.split(r"[\s,]+", val): + if t.startswith("/"): + out.add(t) + except OSError: + continue + + return out + + +def _iter_system_capture_paths() -> List[tuple[str, str]]: + """Return (path, reason) pairs for essential system config/state.""" + out: List[tuple[str, str]] = [] + + # APT: capture sources and related config + apt_sources: List[str] = [] + for g in _APT_SOURCE_GLOBS: + apt_sources.extend(_iter_matching_files(g)) + for p in sorted(set(apt_sources)): + out.append((p, "system_apt_sources")) + + # APT: misc config files/dirs + for g in _APT_MISC_GLOBS: + for p in _iter_matching_files(g): + out.append((p, "system_apt_config")) + + # APT: referenced keyrings (may live outside /etc) + signed_by = _parse_apt_signed_by(sorted(set(apt_sources))) + for p in sorted(signed_by): + if os.path.islink(p) or not os.path.isfile(p): + continue + out.append((p, "system_apt_keyring")) + + # Other system config/state globs + for spec, reason in _SYSTEM_CAPTURE_GLOBS: + for p in _iter_matching_files(spec): + out.append((p, reason)) + + # De-dup while preserving first reason + seen: Set[str] = set() + uniq: List[tuple[str, str]] = [] + for p, r in out: + if p in seen: + continue + seen.add(p) + uniq.append((p, r)) + return uniq + + def harvest( bundle_dir: str, policy: Optional[IgnorePolicy] = None, @@ -467,6 +649,107 @@ def harvest( ) ) + # ------------------------- + # Enabled systemd timers + # + # Timers are typically related to a service/package, so we try to attribute + # timer unit overrides to their associated role rather than creating a + # standalone timer role. If we can't attribute a timer, it will fall back + # to etc_custom (if it's a custom /etc unit). + # ------------------------- + timer_extra_by_pkg: Dict[str, List[str]] = {} + try: + enabled_timers = list_enabled_timers() + except Exception: + enabled_timers = [] + + service_snap_by_unit: Dict[str, ServiceSnapshot] = { + s.unit: s for s in service_snaps + } + + for t in enabled_timers: + try: + ti = get_timer_info(t) + except Exception: # nosec + continue + + timer_paths: List[str] = [] + for pth in [ti.fragment_path, *ti.dropin_paths, *ti.env_files]: + if not pth: + continue + if not pth.startswith("/etc/"): + # Prefer capturing only custom/overridden units. + continue + if os.path.islink(pth) or not os.path.isfile(pth): + continue + timer_paths.append(pth) + + if not timer_paths: + continue + + # Primary attribution: timer -> trigger service role + snap = None + if ti.trigger_unit: + snap = service_snap_by_unit.get(ti.trigger_unit) + + if snap is not None: + for path in timer_paths: + if path_filter.is_excluded(path): + snap.excluded.append( + ExcludedFile(path=path, reason="user_excluded") + ) + continue + deny = policy.deny_reason(path) + if deny: + snap.excluded.append(ExcludedFile(path=path, reason=deny)) + continue + try: + owner, group, mode = stat_triplet(path) + except OSError: + snap.excluded.append(ExcludedFile(path=path, reason="unreadable")) + continue + src_rel = path.lstrip("/") + try: + _copy_into_bundle(bundle_dir, snap.role_name, path, src_rel) + except OSError: + snap.excluded.append(ExcludedFile(path=path, reason="unreadable")) + continue + snap.managed_files.append( + ManagedFile( + path=path, + src_rel=src_rel, + owner=owner, + group=group, + mode=mode, + reason="related_timer", + ) + ) + continue + + # Secondary attribution: associate timer overrides with a package role + # (useful when a timer triggers a service that isn't enabled). + pkgs: Set[str] = set() + if ti.fragment_path: + p = dpkg_owner(ti.fragment_path) + if p: + pkgs.add(p) + if ti.trigger_unit and ti.trigger_unit.endswith(".service"): + try: + ui = get_unit_info(ti.trigger_unit) + if ui.fragment_path: + p = dpkg_owner(ui.fragment_path) + if p: + pkgs.add(p) + for exe in ui.exec_paths: + p = dpkg_owner(exe) + if p: + pkgs.add(p) + except Exception: # nosec + pass + + for pkg in pkgs: + timer_extra_by_pkg.setdefault(pkg, []).extend(timer_paths) + # ------------------------- # Manually installed package roles # ------------------------- @@ -490,6 +773,9 @@ def harvest( managed: List[ManagedFile] = [] candidates: Dict[str, str] = {} + for tpath in timer_extra_by_pkg.get(pkg, []): + candidates.setdefault(tpath, "related_timer") + conff = conffiles_by_pkg.get(pkg, {}) md5sums = read_pkg_md5sums(pkg) @@ -677,7 +963,46 @@ def harvest( for mf in users_managed: already.add(mf.path) - # Walk /etc for unowned config-ish files + # Capture essential system config/state (even if package-owned). + for path, reason in _iter_system_capture_paths(): + if path in already: + continue + + if path_filter.is_excluded(path): + etc_excluded.append(ExcludedFile(path=path, reason="user_excluded")) + continue + + deny = policy.deny_reason(path) + if deny: + etc_excluded.append(ExcludedFile(path=path, reason=deny)) + continue + + try: + owner, group, mode = stat_triplet(path) + except OSError: + etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) + continue + + src_rel = path.lstrip("/") + try: + _copy_into_bundle(bundle_dir, etc_role_name, path, src_rel) + except OSError: + etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) + continue + + etc_managed.append( + ManagedFile( + path=path, + src_rel=src_rel, + owner=owner, + group=group, + mode=mode, + reason=reason, + ) + ) + already.add(path) + + # Walk /etc for remaining unowned config-ish files scanned = 0 for dirpath, _, filenames in os.walk("/etc"): for fn in filenames: diff --git a/enroll/ignore.py b/enroll/ignore.py index 93ba423..ab2cb96 100644 --- a/enroll/ignore.py +++ b/enroll/ignore.py @@ -30,6 +30,21 @@ DEFAULT_DENY_GLOBS = [ "/usr/local/etc/letsencrypt/*", ] + +# Allow a small set of binary config artifacts that are commonly required to +# reproduce system configuration (notably APT keyrings). These are still subject +# to size and readability limits, but are exempt from the "binary_like" denial. +DEFAULT_ALLOW_BINARY_GLOBS = [ + "/etc/apt/trusted.gpg", + "/etc/apt/trusted.gpg.d/*.gpg", + "/etc/apt/keyrings/*.gpg", + "/etc/apt/keyrings/*.pgp", + "/etc/apt/keyrings/*.asc", + "/usr/share/keyrings/*.gpg", + "/usr/share/keyrings/*.pgp", + "/usr/share/keyrings/*.asc", +] + SENSITIVE_CONTENT_PATTERNS = [ re.compile(rb"-----BEGIN (RSA |EC |OPENSSH |)PRIVATE KEY-----"), re.compile(rb"(?i)\bpassword\s*="), @@ -44,6 +59,7 @@ BLOCK_END = b"*/" @dataclass class IgnorePolicy: deny_globs: Optional[list[str]] = None + allow_binary_globs: Optional[list[str]] = None max_file_bytes: int = 256_000 sample_bytes: int = 64_000 # If True, be much less conservative about collecting potentially @@ -54,6 +70,8 @@ class IgnorePolicy: def __post_init__(self) -> None: if self.deny_globs is None: self.deny_globs = list(DEFAULT_DENY_GLOBS) + if self.allow_binary_globs is None: + self.allow_binary_globs = list(DEFAULT_ALLOW_BINARY_GLOBS) def iter_effective_lines(self, content: bytes): in_block = False @@ -105,6 +123,10 @@ class IgnorePolicy: return "unreadable" if b"\x00" in data: + for g in self.allow_binary_globs or []: + if fnmatch.fnmatch(path, g): + # Binary is acceptable for explicitly-allowed paths. + return None return "binary_like" if not self.dangerous: diff --git a/enroll/systemd.py b/enroll/systemd.py index ae8ce8d..7081001 100644 --- a/enroll/systemd.py +++ b/enroll/systemd.py @@ -33,6 +33,19 @@ def _run(cmd: list[str]) -> str: return p.stdout +@dataclass +class TimerInfo: + name: str + fragment_path: Optional[str] + dropin_paths: List[str] + env_files: List[str] + trigger_unit: Optional[str] + active_state: Optional[str] + sub_state: Optional[str] + unit_file_state: Optional[str] + condition_result: Optional[str] + + def list_enabled_services() -> List[str]: out = _run( [ @@ -58,6 +71,31 @@ def list_enabled_services() -> List[str]: return sorted(set(units)) +def list_enabled_timers() -> List[str]: + out = _run( + [ + "systemctl", + "list-unit-files", + "--type=timer", + "--state=enabled", + "--no-legend", + ] + ) + units: List[str] = [] + for line in out.splitlines(): + parts = line.split() + if not parts: + continue + unit = parts[0].strip() + if not unit.endswith(".timer"): + continue + # Skip template units like "foo@.timer" + if unit.endswith("@.timer"): + continue + units.append(unit) + return sorted(set(units)) + + def get_unit_info(unit: str) -> UnitInfo: p = subprocess.run( [ @@ -117,3 +155,62 @@ def get_unit_info(unit: str) -> UnitInfo: unit_file_state=kv.get("UnitFileState") or None, condition_result=kv.get("ConditionResult") or None, ) + + +def get_timer_info(unit: str) -> TimerInfo: + p = subprocess.run( + [ + "systemctl", + "show", + unit, + "-p", + "FragmentPath", + "-p", + "DropInPaths", + "-p", + "EnvironmentFiles", + "-p", + "Unit", + "-p", + "ActiveState", + "-p", + "SubState", + "-p", + "UnitFileState", + "-p", + "ConditionResult", + ], + text=True, + capture_output=True, + ) # nosec + if p.returncode != 0: + raise RuntimeError(f"systemctl show failed for {unit}: {p.stderr}") + + kv: dict[str, str] = {} + for line in (p.stdout or "").splitlines(): + if "=" in line: + k, v = line.split("=", 1) + kv[k] = v.strip() + + fragment = kv.get("FragmentPath") or None + dropins = [pp for pp in (kv.get("DropInPaths", "") or "").split() if pp] + + env_files: List[str] = [] + for token in (kv.get("EnvironmentFiles", "") or "").split(): + token = token.lstrip("-") + if token: + env_files.append(token) + + trigger = kv.get("Unit") or None + + return TimerInfo( + name=unit, + fragment_path=fragment, + dropin_paths=dropins, + env_files=env_files, + trigger_unit=trigger, + active_state=kv.get("ActiveState") or None, + sub_state=kv.get("SubState") or None, + unit_file_state=kv.get("UnitFileState") or None, + condition_result=kv.get("ConditionResult") or None, + ) diff --git a/rpm/enroll.spec b/rpm/enroll.spec index 403d6da..707dc10 100644 --- a/rpm/enroll.spec +++ b/rpm/enroll.spec @@ -1,4 +1,4 @@ -%global upstream_version 0.1.3 +%global upstream_version 0.1.4 Name: enroll Version: %{upstream_version} @@ -44,4 +44,7 @@ Enroll a server's running state retrospectively into Ansible. %changelog * Sat Dec 27 2025 Miguel Jacq - %{version}-%{release} +- Attempt to capture more stuff from /etc that might not be attributable to a specific package. This includes common singletons and systemd timers +- Avoid duplicate apt data in package-specific roles. +* Sat Dec 27 2025 Miguel Jacq - %{version}-%{release} - Initial RPM packaging for Fedora 42 From 40aad9e798c4631c571608dbeeb1a2319440cdc0 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sat, 27 Dec 2025 19:04:00 +1100 Subject: [PATCH 09/69] 0.1.4 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 3079404..f1f2420 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "enroll" -version = "0.1.3" +version = "0.1.4" description = "Enroll a server's running state retrospectively into Ansible" authors = ["Miguel Jacq "] license = "GPL-3.0-or-later" From cae6246177581a0cc79e6aa3704298a164a154e3 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sat, 27 Dec 2025 19:14:01 +1100 Subject: [PATCH 10/69] Add Fedora install steps to README --- README.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/README.md b/README.md index 5a0db91..c6b8123 100644 --- a/README.md +++ b/README.md @@ -191,6 +191,25 @@ sudo apt update sudo apt install enroll ``` +### Fedora 42 + +```bash +sudo rpm --import https://mig5.net/static/mig5.asc + +sudo tee /etc/yum.repos.d/mig5.repo > /dev/null << 'EOF' +[mig5] +name=mig5 Repository +baseurl=https://rpm.mig5.net/rpm/$basearch +enabled=1 +gpgcheck=1 +repo_gpgcheck=1 +gpgkey=https://mig5.net/static/mig5.asc +EOF + +sudo dnf upgrade --refresh +sudo dnf install enroll +``` + ## AppImage Download it from my Releases page, then: From 303c1b0dd8b47fed40bb275845155a4c9daf4b38 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sun, 28 Dec 2025 09:30:21 +1100 Subject: [PATCH 11/69] Consolidate logrotate and cron files into their main service/package roles if they exist. Standardise on MAX_FILES_CAP in one place --- enroll/harvest.py | 233 ++++++++++++++++++++++++++++++++++++------- enroll/manifest.py | 30 ++---- enroll/pathfilter.py | 2 +- 3 files changed, 208 insertions(+), 57 deletions(-) diff --git a/enroll/harvest.py b/enroll/harvest.py index 0543355..d4cfacd 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -125,7 +125,9 @@ ALLOWED_UNOWNED_EXTS = { "", # allow extensionless (common in /etc/default and /etc/init.d) } -MAX_UNOWNED_FILES_PER_ROLE = 400 +MAX_FILES_CAP = 4000 + +MAX_UNOWNED_FILES_PER_ROLE = 500 # Directories that are shared across many packages; never attribute unowned files in these trees to a single package. SHARED_ETC_TOPDIRS = { @@ -324,7 +326,7 @@ _SYSTEM_CAPTURE_GLOBS: List[tuple[str, str]] = [ ] -def _iter_matching_files(spec: str, *, cap: int = 2000) -> List[str]: +def _iter_matching_files(spec: str, *, cap: int = MAX_FILES_CAP) -> List[str]: """Expand a glob spec and also walk directories to collect files.""" out: List[str] = [] for p in glob.glob(spec): @@ -963,43 +965,141 @@ def harvest( for mf in users_managed: already.add(mf.path) + # Maps for re-attributing shared snippets (cron.d/logrotate.d) to existing roles. + svc_by_role: Dict[str, ServiceSnapshot] = {s.role_name: s for s in service_snaps} + pkg_by_role: Dict[str, PackageSnapshot] = {p.role_name: p for p in pkg_snaps} + + def _target_role_for_shared_snippet(path: str) -> Optional[tuple[str, str]]: + """If `path` is a shared snippet, return (role_name, reason) to attach to.""" + base = os.path.basename(path) + + # Try full filename and stem (before first dot). + candidates: List[str] = [base] + if "." in base: + candidates.append(base.split(".", 1)[0]) + + seen: Set[str] = set() + uniq: List[str] = [] + for c in candidates: + if c and c not in seen: + seen.add(c) + uniq.append(c) + + if path.startswith("/etc/logrotate.d/"): + for c in uniq: + rn = _safe_name(c) + if rn in svc_by_role or rn in pkg_by_role: + return (rn, "logrotate_snippet") + return None + + if path.startswith("/etc/cron.d/"): + for c in uniq: + rn = _safe_name(c) + if rn in svc_by_role or rn in pkg_by_role: + return (rn, "cron_snippet") + return None + + return None + # Capture essential system config/state (even if package-owned). for path, reason in _iter_system_capture_paths(): if path in already: continue + target = _target_role_for_shared_snippet(path) + if path_filter.is_excluded(path): - etc_excluded.append(ExcludedFile(path=path, reason="user_excluded")) + if target: + rn, _ = target + if rn in svc_by_role: + svc_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="user_excluded") + ) + elif rn in pkg_by_role: + pkg_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="user_excluded") + ) + else: + etc_excluded.append(ExcludedFile(path=path, reason="user_excluded")) + already.add(path) continue deny = policy.deny_reason(path) if deny: - etc_excluded.append(ExcludedFile(path=path, reason=deny)) + if target: + rn, _ = target + if rn in svc_by_role: + svc_by_role[rn].excluded.append( + ExcludedFile(path=path, reason=deny) + ) + elif rn in pkg_by_role: + pkg_by_role[rn].excluded.append( + ExcludedFile(path=path, reason=deny) + ) + else: + etc_excluded.append(ExcludedFile(path=path, reason=deny)) + already.add(path) continue try: owner, group, mode = stat_triplet(path) except OSError: - etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) + if target: + rn, _ = target + if rn in svc_by_role: + svc_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="unreadable") + ) + elif rn in pkg_by_role: + pkg_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="unreadable") + ) + else: + etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) + already.add(path) continue src_rel = path.lstrip("/") + role_for_copy = etc_role_name + reason_for_role = reason + if target: + role_for_copy, reason_for_role = target + try: - _copy_into_bundle(bundle_dir, etc_role_name, path, src_rel) + _copy_into_bundle(bundle_dir, role_for_copy, path, src_rel) except OSError: - etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) + if target: + rn, _ = target + if rn in svc_by_role: + svc_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="unreadable") + ) + elif rn in pkg_by_role: + pkg_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="unreadable") + ) + else: + etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) + already.add(path) continue - etc_managed.append( - ManagedFile( - path=path, - src_rel=src_rel, - owner=owner, - group=group, - mode=mode, - reason=reason, - ) + mf = ManagedFile( + path=path, + src_rel=src_rel, + owner=owner, + group=group, + mode=mode, + reason=reason_for_role, ) + if target: + rn, _ = target + if rn in svc_by_role: + svc_by_role[rn].managed_files.append(mf) + elif rn in pkg_by_role: + pkg_by_role[rn].managed_files.append(mf) + else: + etc_managed.append(mf) + already.add(path) # Walk /etc for remaining unowned config-ish files @@ -1016,45 +1116,106 @@ def harvest( if not _is_confish(path): continue + target = _target_role_for_shared_snippet(path) + if path_filter.is_excluded(path): - etc_excluded.append(ExcludedFile(path=path, reason="user_excluded")) + if target: + rn, _ = target + if rn in svc_by_role: + svc_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="user_excluded") + ) + elif rn in pkg_by_role: + pkg_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="user_excluded") + ) + else: + etc_excluded.append(ExcludedFile(path=path, reason="user_excluded")) + already.add(path) continue deny = policy.deny_reason(path) if deny: - etc_excluded.append(ExcludedFile(path=path, reason=deny)) + if target: + rn, _ = target + if rn in svc_by_role: + svc_by_role[rn].excluded.append( + ExcludedFile(path=path, reason=deny) + ) + elif rn in pkg_by_role: + pkg_by_role[rn].excluded.append( + ExcludedFile(path=path, reason=deny) + ) + else: + etc_excluded.append(ExcludedFile(path=path, reason=deny)) + already.add(path) continue try: owner, group, mode = stat_triplet(path) except OSError: - etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) + if target: + rn, _ = target + if rn in svc_by_role: + svc_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="unreadable") + ) + elif rn in pkg_by_role: + pkg_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="unreadable") + ) + else: + etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) + already.add(path) continue src_rel = path.lstrip("/") + role_for_copy = etc_role_name + reason_for_role = "custom_unowned" + if target: + role_for_copy, reason_for_role = target + try: - _copy_into_bundle(bundle_dir, etc_role_name, path, src_rel) + _copy_into_bundle(bundle_dir, role_for_copy, path, src_rel) except OSError: - etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) + if target: + rn, _ = target + if rn in svc_by_role: + svc_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="unreadable") + ) + elif rn in pkg_by_role: + pkg_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="unreadable") + ) + else: + etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) + already.add(path) continue - etc_managed.append( - ManagedFile( - path=path, - src_rel=src_rel, - owner=owner, - group=group, - mode=mode, - reason="custom_unowned", - ) + mf = ManagedFile( + path=path, + src_rel=src_rel, + owner=owner, + group=group, + mode=mode, + reason=reason_for_role, ) + if target: + rn, _ = target + if rn in svc_by_role: + svc_by_role[rn].managed_files.append(mf) + elif rn in pkg_by_role: + pkg_by_role[rn].managed_files.append(mf) + else: + etc_managed.append(mf) scanned += 1 - if scanned >= 2000: + if scanned >= MAX_FILES_CAP: etc_notes.append( - "Reached file cap (2000) while scanning /etc for unowned files." + f"Reached file cap ({MAX_FILES_CAP}) while scanning /etc for unowned files." ) break - if scanned >= 2000: + if scanned >= MAX_FILES_CAP: break etc_custom_snapshot = EtcCustomSnapshot( @@ -1146,7 +1307,7 @@ def harvest( _scan_usr_local_tree( "/usr/local/etc", require_executable=False, - cap=2000, + cap=MAX_FILES_CAP, reason="usr_local_etc_custom", ) @@ -1154,7 +1315,7 @@ def harvest( _scan_usr_local_tree( "/usr/local/bin", require_executable=True, - cap=2000, + cap=MAX_FILES_CAP, reason="usr_local_bin_script", ) @@ -1188,7 +1349,7 @@ def harvest( files, inc_notes = expand_includes( path_filter.iter_include_patterns(), exclude=path_filter, - max_files=4000, + max_files=MAX_FILES_CAP, ) included_files = files extra_notes.extend(inc_notes) diff --git a/enroll/manifest.py b/enroll/manifest.py index 2f28eab..d5ebff7 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -138,7 +138,6 @@ def _copy_artifacts( # If a file was successfully templatised by JinjaTurtle, do NOT # also materialise the raw copy in the destination files dir. - # (This keeps the output minimal and avoids redundant "raw" files.) if exclude_rels and rel in exclude_rels: try: if os.path.isfile(dst): @@ -165,7 +164,7 @@ def _write_role_scaffold(role_dir: str) -> None: def _write_playbook_all(path: str, roles: List[str]) -> None: pb_lines = [ "---", - "- name: Apply all roles on host", + "- name: Apply all roles on all hosts", " hosts: all", " become: true", " roles:", @@ -179,7 +178,7 @@ def _write_playbook_all(path: str, roles: List[str]) -> None: def _write_playbook_host(path: str, fqdn: str, roles: List[str]) -> None: pb_lines = [ "---", - f"- name: Apply enroll roles on {fqdn}", + f"- name: Apply all roles on {fqdn}", f" hosts: {fqdn}", " become: true", " roles:", @@ -390,9 +389,9 @@ def _render_generic_files_tasks( # Using first_found makes roles work in both modes: # - site-mode: inventory/host_vars///.files/... # - non-site: roles//files/... - return f"""# Generated by enroll (data-driven tasks) + return f"""# Generated by enroll -- name: Deploy systemd unit files (templates) +- name: Deploy any systemd unit files (templates) ansible.builtin.template: src: "{{{{ item.src_rel }}}}.j2" dest: "{{{{ item.dest }}}}" @@ -406,7 +405,7 @@ def _render_generic_files_tasks( | list }}}} notify: "{{{{ item.notify | default([]) }}}}" -- name: Deploy systemd unit files (copies) +- name: Deploy any systemd unit files (raw files) vars: _enroll_ff: files: @@ -433,7 +432,7 @@ def _render_generic_files_tasks( | list | length) > 0 -- name: Deploy other managed files (templates) +- name: Deploy any other managed files (templates) ansible.builtin.template: src: "{{{{ item.src_rel }}}}.j2" dest: "{{{{ item.dest }}}}" @@ -447,7 +446,7 @@ def _render_generic_files_tasks( | list }}}} notify: "{{{{ item.notify | default([]) }}}}" -- name: Deploy other managed files (copies) +- name: Deploy any other managed files (raw files) vars: _enroll_ff: files: @@ -668,11 +667,6 @@ def _manifest_from_bundle_dir( manifested_service_roles: List[str] = [] manifested_pkg_roles: List[str] = [] - # In site_mode, raw harvested files are stored under host-specific inventory - # to avoid cross-host clobber while still sharing a role definition. - - # ------------------------- - # ------------------------- # Users role (non-system users) # ------------------------- @@ -793,7 +787,7 @@ def _manifest_from_bundle_dir( # tasks (data-driven) users_tasks = """--- -# Generated by enroll (data-driven tasks) +# Generated by enroll - name: Ensure groups exist ansible.builtin.group: @@ -893,8 +887,6 @@ Generated non-system user accounts and SSH public material. manifested_users_roles.append(role) - # ------------------------- - # ------------------------- # etc_custom role (unowned /etc not already attributed) # ------------------------- @@ -1212,8 +1204,6 @@ User-requested extra file harvesting. manifested_usr_local_custom_roles.append(role) - # ------------------------- - # ------------------------- # Service roles # ------------------------- @@ -1315,7 +1305,7 @@ User-requested extra file harvesting. task_parts: List[str] = [] task_parts.append( f"""--- -# Generated by enroll (data-driven tasks) +# Generated by enroll - name: Install packages for {role} ansible.builtin.apt: @@ -1474,7 +1464,7 @@ Generated from `{unit}`. task_parts: List[str] = [] task_parts.append( f"""--- -# Generated by enroll (data-driven tasks) +# Generated by enroll - name: Install packages for {role} ansible.builtin.apt: diff --git a/enroll/pathfilter.py b/enroll/pathfilter.py index 9df4afa..6541ca9 100644 --- a/enroll/pathfilter.py +++ b/enroll/pathfilter.py @@ -174,7 +174,7 @@ def expand_includes( patterns: Sequence[CompiledPathPattern], *, exclude: Optional[PathFilter] = None, - max_files: int = 4000, + max_files: int, ) -> Tuple[List[str], List[str]]: """Expand include patterns into concrete file paths. From 8c6b51be3eb2ea949861937eddcffed74a439873 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sun, 28 Dec 2025 09:39:14 +1100 Subject: [PATCH 12/69] Manage apt stuff in its own role, not in etc_custom --- enroll/diff.py | 6 ++ enroll/harvest.py | 120 ++++++++++++++++++++++++++++++---- enroll/manifest.py | 157 ++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 270 insertions(+), 13 deletions(-) diff --git a/enroll/diff.py b/enroll/diff.py index a2b7d91..0110d17 100644 --- a/enroll/diff.py +++ b/enroll/diff.py @@ -184,6 +184,12 @@ def _iter_managed_files(state: Dict[str, Any]) -> Iterable[Tuple[str, Dict[str, for mf in u.get("managed_files", []) or []: yield str(u_role), mf + # apt_config + ac = state.get("apt_config") or {} + ac_role = ac.get("role_name") or "apt_config" + for mf in ac.get("managed_files", []) or []: + yield str(ac_role), mf + # etc_custom ec = state.get("etc_custom") or {} ec_role = ec.get("role_name") or "etc_custom" diff --git a/enroll/harvest.py b/enroll/harvest.py index d4cfacd..c1a1986 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -77,6 +77,14 @@ class UsersSnapshot: notes: List[str] +@dataclass +class AptConfigSnapshot: + role_name: str + managed_files: List[ManagedFile] + excluded: List[ExcludedFile] + notes: List[str] + + @dataclass class EtcCustomSnapshot: role_name: str @@ -126,7 +134,6 @@ ALLOWED_UNOWNED_EXTS = { } MAX_FILES_CAP = 4000 - MAX_UNOWNED_FILES_PER_ROLE = 500 # Directories that are shared across many packages; never attribute unowned files in these trees to a single package. @@ -401,30 +408,61 @@ def _parse_apt_signed_by(source_files: List[str]) -> Set[str]: return out -def _iter_system_capture_paths() -> List[tuple[str, str]]: - """Return (path, reason) pairs for essential system config/state.""" - out: List[tuple[str, str]] = [] +def _iter_apt_capture_paths() -> List[tuple[str, str]]: + """Return (path, reason) pairs for APT configuration. - # APT: capture sources and related config + This captures the full /etc/apt tree (subject to IgnorePolicy at copy time), + plus any keyrings referenced via signed-by/Signed-By which may live outside + /etc (e.g. /usr/share/keyrings). + """ + reasons: Dict[str, str] = {} + + # Capture all regular files under /etc/apt (no symlinks). + if os.path.isdir("/etc/apt"): + for dirpath, _, filenames in os.walk("/etc/apt"): + for fn in filenames: + p = os.path.join(dirpath, fn) + if os.path.islink(p) or not os.path.isfile(p): + continue + reasons.setdefault(p, "apt_config") + + # Identify source files explicitly for nicer reasons and keyring discovery. apt_sources: List[str] = [] for g in _APT_SOURCE_GLOBS: apt_sources.extend(_iter_matching_files(g)) for p in sorted(set(apt_sources)): - out.append((p, "system_apt_sources")) + reasons[p] = "apt_source" - # APT: misc config files/dirs - for g in _APT_MISC_GLOBS: + # Keyrings in standard locations. + for g in ( + "/etc/apt/trusted.gpg", + "/etc/apt/trusted.gpg.d/*", + "/etc/apt/keyrings/*", + ): for p in _iter_matching_files(g): - out.append((p, "system_apt_config")) + reasons[p] = "apt_keyring" - # APT: referenced keyrings (may live outside /etc) + # Keyrings referenced by sources (may live outside /etc/apt). signed_by = _parse_apt_signed_by(sorted(set(apt_sources))) for p in sorted(signed_by): if os.path.islink(p) or not os.path.isfile(p): continue - out.append((p, "system_apt_keyring")) + if p.startswith("/etc/apt/"): + reasons[p] = "apt_keyring" + else: + reasons[p] = "apt_signed_by_keyring" + + # De-dup with stable ordering. + uniq: List[tuple[str, str]] = [] + for p in sorted(reasons.keys()): + uniq.append((p, reasons[p])) + return uniq + + +def _iter_system_capture_paths() -> List[tuple[str, str]]: + """Return (path, reason) pairs for essential system config/state (non-APT).""" + out: List[tuple[str, str]] = [] - # Other system config/state globs for spec, reason in _SYSTEM_CAPTURE_GLOBS: for p in _iter_matching_files(spec): out.append((p, reason)) @@ -544,6 +582,8 @@ def harvest( for path in pkg_to_etc_paths.get(pkg, []): if not os.path.isfile(path) or os.path.islink(path): continue + if path.startswith("/etc/apt/"): + continue if path in conff: # Only capture conffiles when they differ from the package default. try: @@ -784,6 +824,8 @@ def harvest( for path in pkg_to_etc_paths.get(pkg, []): if not os.path.isfile(path) or os.path.islink(path): continue + if path.startswith("/etc/apt/"): + continue if path in conff: try: current = file_md5(path) @@ -946,6 +988,55 @@ def harvest( notes=users_notes, ) + # ------------------------- + # apt_config role (APT configuration and keyrings) + # ------------------------- + apt_notes: List[str] = [] + apt_excluded: List[ExcludedFile] = [] + apt_managed: List[ManagedFile] = [] + apt_role_name = "apt_config" + + for path, reason in _iter_apt_capture_paths(): + if path_filter.is_excluded(path): + apt_excluded.append(ExcludedFile(path=path, reason="user_excluded")) + continue + + deny = policy.deny_reason(path) + if deny: + apt_excluded.append(ExcludedFile(path=path, reason=deny)) + continue + + try: + owner, group, mode = stat_triplet(path) + except OSError: + apt_excluded.append(ExcludedFile(path=path, reason="unreadable")) + continue + + src_rel = path.lstrip("/") + try: + _copy_into_bundle(bundle_dir, apt_role_name, path, src_rel) + except OSError: + apt_excluded.append(ExcludedFile(path=path, reason="unreadable")) + continue + + apt_managed.append( + ManagedFile( + path=path, + src_rel=src_rel, + owner=owner, + group=group, + mode=mode, + reason=reason, + ) + ) + + apt_config_snapshot = AptConfigSnapshot( + role_name=apt_role_name, + managed_files=apt_managed, + excluded=apt_excluded, + notes=apt_notes, + ) + # ------------------------- # etc_custom role (unowned /etc files not already attributed elsewhere) # ------------------------- @@ -964,6 +1055,8 @@ def harvest( already.add(mf.path) for mf in users_managed: already.add(mf.path) + for mf in apt_managed: + already.add(mf.path) # Maps for re-attributing shared snippets (cron.d/logrotate.d) to existing roles. svc_by_role: Dict[str, ServiceSnapshot] = {s.role_name: s for s in service_snaps} @@ -1107,6 +1200,8 @@ def harvest( for dirpath, _, filenames in os.walk("/etc"): for fn in filenames: path = os.path.join(dirpath, fn) + if path.startswith("/etc/apt/"): + continue if path in already: continue if path in owned_etc: @@ -1408,6 +1503,7 @@ def harvest( "manual_packages": manual_pkgs, "manual_packages_skipped": manual_pkgs_skipped, "package_roles": [asdict(p) for p in pkg_snaps], + "apt_config": asdict(apt_config_snapshot), "etc_custom": asdict(etc_custom_snapshot), "usr_local_custom": asdict(usr_local_custom_snapshot), "extra_paths": asdict(extra_paths_snapshot), diff --git a/enroll/manifest.py b/enroll/manifest.py index d5ebff7..dbc2353 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -2,6 +2,7 @@ from __future__ import annotations import json import os +import re import shutil import stat import tarfile @@ -627,6 +628,7 @@ def _manifest_from_bundle_dir( services: List[Dict[str, Any]] = state.get("services", []) package_roles: List[Dict[str, Any]] = state.get("package_roles", []) users_snapshot: Dict[str, Any] = state.get("users", {}) + apt_config_snapshot: Dict[str, Any] = state.get("apt_config", {}) etc_custom_snapshot: Dict[str, Any] = state.get("etc_custom", {}) usr_local_custom_snapshot: Dict[str, Any] = state.get("usr_local_custom", {}) extra_paths_snapshot: Dict[str, Any] = state.get("extra_paths", {}) @@ -661,6 +663,7 @@ def _manifest_from_bundle_dir( _ensure_ansible_cfg(os.path.join(out_dir, "ansible.cfg")) manifested_users_roles: List[str] = [] + manifested_apt_config_roles: List[str] = [] manifested_etc_custom_roles: List[str] = [] manifested_usr_local_custom_roles: List[str] = [] manifested_extra_paths_roles: List[str] = [] @@ -887,6 +890,157 @@ Generated non-system user accounts and SSH public material. manifested_users_roles.append(role) + # ------------------------- + # apt_config role (APT sources, pinning, and keyrings) + # ------------------------- + if apt_config_snapshot and apt_config_snapshot.get("managed_files"): + role = apt_config_snapshot.get("role_name", "apt_config") + role_dir = os.path.join(roles_root, role) + _write_role_scaffold(role_dir) + + var_prefix = role + + managed_files = apt_config_snapshot.get("managed_files", []) + excluded = apt_config_snapshot.get("excluded", []) + notes = apt_config_snapshot.get("notes", []) + + templated, jt_vars = _jinjify_managed_files( + bundle_dir, + role, + role_dir, + managed_files, + jt_exe=jt_exe, + jt_enabled=jt_enabled, + overwrite_templates=not site_mode, + ) + + # Copy only the non-templated artifacts (templates live in the role). + if site_mode: + _copy_artifacts( + bundle_dir, + role, + _host_role_files_dir(out_dir, fqdn or "", role), + exclude_rels=templated, + ) + else: + _copy_artifacts( + bundle_dir, + role, + os.path.join(role_dir, "files"), + exclude_rels=templated, + ) + + files_var = _build_managed_files_var( + managed_files, + templated, + notify_other=None, + notify_systemd=None, + ) + + jt_map = _yaml_load_mapping(jt_vars) if jt_vars.strip() else {} + vars_map: Dict[str, Any] = {f"{var_prefix}_managed_files": files_var} + vars_map = _merge_mappings_overwrite(vars_map, jt_map) + + if site_mode: + _write_role_defaults(role_dir, {f"{var_prefix}_managed_files": []}) + _write_hostvars(out_dir, fqdn or "", role, vars_map) + else: + _write_role_defaults(role_dir, vars_map) + + tasks = """---\n""" + _render_generic_files_tasks( + var_prefix, include_restart_notify=False + ) + with open( + os.path.join(role_dir, "tasks", "main.yml"), "w", encoding="utf-8" + ) as f: + f.write(tasks.rstrip() + "\n") + + with open( + os.path.join(role_dir, "meta", "main.yml"), "w", encoding="utf-8" + ) as f: + f.write("---\ndependencies: []\n") + + # README: summarise repos and keyrings + source_paths: List[str] = [] + keyring_paths: List[str] = [] + repo_hosts: Set[str] = set() + + url_re = re.compile(r"(?:https?|ftp)://([^/\s]+)", re.IGNORECASE) + + for mf in managed_files: + p = str(mf.get("path") or "") + src_rel = str(mf.get("src_rel") or "") + if not p or not src_rel: + continue + + if p == "/etc/apt/sources.list" or p.startswith("/etc/apt/sources.list.d/"): + source_paths.append(p) + art_path = os.path.join(bundle_dir, "artifacts", role, src_rel) + try: + with open(art_path, "r", encoding="utf-8", errors="replace") as sf: + for line in sf: + line = line.strip() + if not line or line.startswith("#"): + continue + for m in url_re.finditer(line): + repo_hosts.add(m.group(1)) + except OSError: + pass # nosec + + if ( + p.startswith("/etc/apt/trusted.gpg") + or p.startswith("/etc/apt/keyrings/") + or p.startswith("/usr/share/keyrings/") + ): + keyring_paths.append(p) + + source_paths = sorted(set(source_paths)) + keyring_paths = sorted(set(keyring_paths)) + repos = sorted(repo_hosts) + + readme = ( + """# apt_config + +APT configuration harvested from the system (sources, pinning, and keyrings). + +## Repository hosts +""" + + ("\n".join([f"- {h}" for h in repos]) or "- (none)") + + """\n +## Source files +""" + + ("\n".join([f"- {p}" for p in source_paths]) or "- (none)") + + """\n +## Keyrings +""" + + ("\n".join([f"- {p}" for p in keyring_paths]) or "- (none)") + + """\n +## Managed files +""" + + ( + "\n".join( + [f"- {mf.get('path')} ({mf.get('reason')})" for mf in managed_files] + ) + or "- (none)" + ) + + """\n +## Excluded +""" + + ( + "\n".join([f"- {e.get('path')} ({e.get('reason')})" for e in excluded]) + or "- (none)" + ) + + """\n +## Notes +""" + + ("\n".join([f"- {n}" for n in notes]) or "- (none)") + + """\n""" + ) + with open(os.path.join(role_dir, "README.md"), "w", encoding="utf-8") as f: + f.write(readme) + + manifested_apt_config_roles.append(role) + # ------------------------- # etc_custom role (unowned /etc not already attributed) # ------------------------- @@ -1512,7 +1666,8 @@ Generated for package `{pkg}`. manifested_pkg_roles.append(role) all_roles = ( - manifested_pkg_roles + manifested_apt_config_roles + + manifested_pkg_roles + manifested_service_roles + manifested_etc_custom_roles + manifested_usr_local_custom_roles From 3fc5aec5fc53090ebfd0e315d9bfdd1442320e98 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sun, 28 Dec 2025 09:56:52 +1100 Subject: [PATCH 13/69] 0.1.5 --- CHANGELOG.md | 6 ++++++ debian/changelog | 8 ++++++++ pyproject.toml | 2 +- rpm/enroll.spec | 6 +++++- 4 files changed, 20 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a51be14..79e45cd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +# 0.1.5 + + * Consolidate logrotate and cron files into their main service/package roles if they exist. + * Standardise on MAX_FILES_CAP in one place + * Manage apt stuff in its own role, not in etc_custom + # 0.1.4 * Attempt to capture more stuff from /etc that might not be attributable to a specific package. This includes common singletons and systemd timers diff --git a/debian/changelog b/debian/changelog index 17b8985..5f3be58 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,11 @@ +enroll (0.1.5) unstable; urgency=medium + + * Consolidate logrotate and cron files into their main service/package roles if they exist. + * Standardise on MAX_FILES_CAP in one place + * Manage apt stuff in its own role, not in etc_custom + + -- Miguel Jacq Sun, 28 Dec 2025 10:00:00 +1100 + enroll (0.1.4) unstable; urgency=medium * Attempt to capture more stuff from /etc that might not be attributable to a specific package. This includes common singletons and systemd timers diff --git a/pyproject.toml b/pyproject.toml index f1f2420..3aa01d0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "enroll" -version = "0.1.4" +version = "0.1.5" description = "Enroll a server's running state retrospectively into Ansible" authors = ["Miguel Jacq "] license = "GPL-3.0-or-later" diff --git a/rpm/enroll.spec b/rpm/enroll.spec index 707dc10..ed0a3c9 100644 --- a/rpm/enroll.spec +++ b/rpm/enroll.spec @@ -1,4 +1,4 @@ -%global upstream_version 0.1.4 +%global upstream_version 0.1.5 Name: enroll Version: %{upstream_version} @@ -43,6 +43,10 @@ Enroll a server's running state retrospectively into Ansible. %{_bindir}/enroll %changelog +* Sun Dec 28 2025 Miguel Jacq - %{version}-%{release} +- Consolidate logrotate and cron files into their main service/package roles if they exist. +- Standardise on MAX_FILES_CAP in one place +- Manage apt stuff in its own role, not in etc_custom * Sat Dec 27 2025 Miguel Jacq - %{version}-%{release} - Attempt to capture more stuff from /etc that might not be attributable to a specific package. This includes common singletons and systemd timers - Avoid duplicate apt data in package-specific roles. From 921801caa632c894ac4228efb390061b64fd668b Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sun, 28 Dec 2025 15:32:40 +1100 Subject: [PATCH 14/69] 0.1.6 --- CHANGELOG.md | 5 + debian/changelog | 7 + enroll/cli.py | 2 +- enroll/debian.py | 4 +- enroll/harvest.py | 718 ++++++++++++++++---------------------- enroll/pathfilter.py | 2 +- pyproject.toml | 2 +- rpm/enroll.spec | 5 +- tests/test___main__.py | 18 + tests/test_accounts.py | 143 ++++++++ tests/test_debian.py | 154 ++++++++ tests/test_diff_bundle.py | 89 +++++ tests/test_pathfilter.py | 80 +++++ tests/test_remote.py | 175 ++++++++++ tests/test_systemd.py | 121 +++++++ 15 files changed, 1102 insertions(+), 423 deletions(-) create mode 100644 tests/test___main__.py create mode 100644 tests/test_accounts.py create mode 100644 tests/test_debian.py create mode 100644 tests/test_diff_bundle.py create mode 100644 tests/test_pathfilter.py create mode 100644 tests/test_remote.py create mode 100644 tests/test_systemd.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 79e45cd..2a4c39d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +# 0.1.6 + + * DRY up some code logic + * More test coverage + # 0.1.5 * Consolidate logrotate and cron files into their main service/package roles if they exist. diff --git a/debian/changelog b/debian/changelog index 5f3be58..a15c38a 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,10 @@ +enroll (0.1.6) unstable; urgency=medium + + * DRY up some code logic + * More test coverage + + -- Miguel Jacq Sun, 28 Dec 2025 15:30:00 +1100 + enroll (0.1.5) unstable; urgency=medium * Consolidate logrotate and cron files into their main service/package roles if they exist. diff --git a/enroll/cli.py b/enroll/cli.py index e5f729d..ae9aba0 100644 --- a/enroll/cli.py +++ b/enroll/cli.py @@ -482,7 +482,7 @@ def main() -> None: metavar="GPG_FINGERPRINT", help=( "Encrypt the harvest as a SOPS-encrypted tarball, and bundle+encrypt the manifest output in --out " - "(same behavior as `harvest --sops` and `manifest --sops`)." + "(same behaviour as `harvest --sops` and `manifest --sops`)." ), ) s.add_argument( diff --git a/enroll/debian.py b/enroll/debian.py index 58569e5..0ddc1f3 100644 --- a/enroll/debian.py +++ b/enroll/debian.py @@ -154,7 +154,9 @@ def parse_status_conffiles( if ":" in line: k, v = line.split(":", 1) key = k - cur[key] = v.lstrip() + # Preserve leading spaces in continuation lines, but strip + # the trailing newline from the initial key line value. + cur[key] = v.lstrip().rstrip("\n") if cur: flush() diff --git a/enroll/harvest.py b/enroll/harvest.py index c1a1986..56e5aed 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -112,9 +112,9 @@ class ExtraPathsSnapshot: ALLOWED_UNOWNED_EXTS = { + ".cfg", ".cnf", ".conf", - ".cfg", ".ini", ".json", ".link", @@ -136,7 +136,9 @@ ALLOWED_UNOWNED_EXTS = { MAX_FILES_CAP = 4000 MAX_UNOWNED_FILES_PER_ROLE = 500 -# Directories that are shared across many packages; never attribute unowned files in these trees to a single package. +# Directories that are shared across many packages. +# Never attribute all unowned files in these trees +# to one single package. SHARED_ETC_TOPDIRS = { "apparmor.d", "apt", @@ -195,6 +197,82 @@ def _copy_into_bundle( shutil.copy2(abs_path, dst) +def _capture_file( + *, + bundle_dir: str, + role_name: str, + abs_path: str, + reason: str, + policy: IgnorePolicy, + path_filter: PathFilter, + managed_out: List[ManagedFile], + excluded_out: List[ExcludedFile], + seen_role: Optional[Set[str]] = None, + seen_global: Optional[Set[str]] = None, + metadata: Optional[tuple[str, str, str]] = None, +) -> bool: + """Try to capture a single file into the bundle. + + Returns True if the file was copied (managed), False otherwise. + + * seen_role: de-dupe within a role (prevents duplicate tasks/records) + * seen_global: de-dupe across roles/stages (prevents multiple roles copying same path) + * metadata: optional (owner, group, mode) tuple to avoid re-statting + """ + + if seen_global is not None and abs_path in seen_global: + return False + if seen_role is not None and abs_path in seen_role: + return False + + def _mark_seen() -> None: + if seen_role is not None: + seen_role.add(abs_path) + if seen_global is not None: + seen_global.add(abs_path) + + if path_filter.is_excluded(abs_path): + excluded_out.append(ExcludedFile(path=abs_path, reason="user_excluded")) + _mark_seen() + return False + + deny = policy.deny_reason(abs_path) + if deny: + excluded_out.append(ExcludedFile(path=abs_path, reason=deny)) + _mark_seen() + return False + + try: + owner, group, mode = ( + metadata if metadata is not None else stat_triplet(abs_path) + ) + except OSError: + excluded_out.append(ExcludedFile(path=abs_path, reason="unreadable")) + _mark_seen() + return False + + src_rel = abs_path.lstrip("/") + try: + _copy_into_bundle(bundle_dir, role_name, abs_path, src_rel) + except OSError: + excluded_out.append(ExcludedFile(path=abs_path, reason="unreadable")) + _mark_seen() + return False + + managed_out.append( + ManagedFile( + path=abs_path, + src_rel=src_rel, + owner=owner, + group=group, + mode=mode, + reason=reason, + ) + ) + _mark_seen() + return True + + def _is_confish(path: str) -> bool: base = os.path.basename(path) _, ext = os.path.splitext(base) @@ -227,7 +305,6 @@ def _maybe_add_specific_paths(hints: Set[str]) -> List[str]: f"/etc/default/{h}", f"/etc/init.d/{h}", f"/etc/sysctl.d/{h}.conf", - f"/etc/logrotate.d/{h}", ] ) return paths @@ -492,7 +569,7 @@ def harvest( policy = IgnorePolicy(dangerous=dangerous) elif dangerous: # If callers explicitly provided a policy but also requested - # dangerous behavior, honour the CLI intent. + # dangerous behaviour, honour the CLI intent. policy.dangerous = True os.makedirs(bundle_dir, exist_ok=True) @@ -513,12 +590,21 @@ def harvest( # Service roles # ------------------------- service_snaps: List[ServiceSnapshot] = [] + # Track alias strings (service names, package names, stems) that should map + # back to the service role for shared snippet attribution (cron.d/logrotate.d). + service_role_aliases: Dict[str, Set[str]] = {} + # De-dupe per-role captures (avoids duplicate tasks in manifest generation). + seen_by_role: Dict[str, Set[str]] = {} for unit in list_enabled_services(): role = _role_name_from_unit(unit) try: ui = get_unit_info(unit) except UnitQueryError as e: + # Even when we can't query the unit, keep a minimal alias mapping so + # shared snippets can still be attributed to this role by name. + service_role_aliases.setdefault(role, _hint_names(unit, set()) | {role}) + seen_by_role.setdefault(role, set()) service_snaps.append( ServiceSnapshot( unit=unit, @@ -567,6 +653,10 @@ def harvest( hints = _hint_names(unit, pkgs) _add_pkgs_from_etc_topdirs(hints, topdir_to_pkgs, pkgs) + # Keep a stable set of aliases for this service role. Include current + # packages as well, so that package-named snippets (e.g. cron.d or + # logrotate.d entries) can still be attributed back to this service. + service_role_aliases[role] = set(hints) | set(pkgs) | {role} for sp in _maybe_add_specific_paths(hints): if not os.path.exists(sp): @@ -610,7 +700,7 @@ def harvest( # key material under service directories (e.g. /etc/openvpn/*.crt). # # To avoid exploding output for shared trees (e.g. /etc/systemd), keep - # the older "config-ish only" behavior for known shared topdirs. + # the older "config-ish only" behaviour for known shared topdirs. any_roots: List[str] = [] confish_roots: List[str] = [] for h in hints: @@ -646,34 +736,20 @@ def harvest( "No packages or /etc candidates detected (unexpected for enabled service)." ) + # De-dupe within this role while capturing. This also avoids emitting + # duplicate Ansible tasks for the same destination path. + role_seen = seen_by_role.setdefault(role, set()) for path, reason in sorted(candidates.items()): - if path_filter.is_excluded(path): - excluded.append(ExcludedFile(path=path, reason="user_excluded")) - continue - deny = policy.deny_reason(path) - if deny: - excluded.append(ExcludedFile(path=path, reason=deny)) - continue - try: - owner, group, mode = stat_triplet(path) - except OSError: - excluded.append(ExcludedFile(path=path, reason="unreadable")) - continue - src_rel = path.lstrip("/") - try: - _copy_into_bundle(bundle_dir, role, path, src_rel) - except OSError: - excluded.append(ExcludedFile(path=path, reason="unreadable")) - continue - managed.append( - ManagedFile( - path=path, - src_rel=src_rel, - owner=owner, - group=group, - mode=mode, - reason=reason, - ) + _capture_file( + bundle_dir=bundle_dir, + role_name=role, + abs_path=path, + reason=reason, + policy=policy, + path_filter=path_filter, + managed_out=managed, + excluded_out=excluded, + seen_role=role_seen, ) service_snaps.append( @@ -735,36 +811,18 @@ def harvest( snap = service_snap_by_unit.get(ti.trigger_unit) if snap is not None: + role_seen = seen_by_role.setdefault(snap.role_name, set()) for path in timer_paths: - if path_filter.is_excluded(path): - snap.excluded.append( - ExcludedFile(path=path, reason="user_excluded") - ) - continue - deny = policy.deny_reason(path) - if deny: - snap.excluded.append(ExcludedFile(path=path, reason=deny)) - continue - try: - owner, group, mode = stat_triplet(path) - except OSError: - snap.excluded.append(ExcludedFile(path=path, reason="unreadable")) - continue - src_rel = path.lstrip("/") - try: - _copy_into_bundle(bundle_dir, snap.role_name, path, src_rel) - except OSError: - snap.excluded.append(ExcludedFile(path=path, reason="unreadable")) - continue - snap.managed_files.append( - ManagedFile( - path=path, - src_rel=src_rel, - owner=owner, - group=group, - mode=mode, - reason="related_timer", - ) + _capture_file( + bundle_dir=bundle_dir, + role_name=snap.role_name, + abs_path=path, + reason="related_timer", + policy=policy, + path_filter=path_filter, + managed_out=snap.managed_files, + excluded_out=snap.excluded, + seen_role=role_seen, ) continue @@ -852,7 +910,6 @@ def harvest( roots.extend([f"/etc/{td}", f"/etc/{td}.d"]) roots.extend([f"/etc/default/{td}"]) roots.extend([f"/etc/init.d/{td}"]) - roots.extend([f"/etc/logrotate.d/{td}"]) roots.extend([f"/etc/sysctl.d/{td}.conf"]) # Capture any custom/unowned files under /etc/ for this @@ -871,34 +928,18 @@ def harvest( if r not in owned_etc and _is_confish(r): candidates.setdefault(r, "custom_specific_path") + role_seen = seen_by_role.setdefault(role, set()) for path, reason in sorted(candidates.items()): - if path_filter.is_excluded(path): - excluded.append(ExcludedFile(path=path, reason="user_excluded")) - continue - deny = policy.deny_reason(path) - if deny: - excluded.append(ExcludedFile(path=path, reason=deny)) - continue - try: - owner, group, mode = stat_triplet(path) - except OSError: - excluded.append(ExcludedFile(path=path, reason="unreadable")) - continue - src_rel = path.lstrip("/") - try: - _copy_into_bundle(bundle_dir, role, path, src_rel) - except OSError: - excluded.append(ExcludedFile(path=path, reason="unreadable")) - continue - managed.append( - ManagedFile( - path=path, - src_rel=src_rel, - owner=owner, - group=group, - mode=mode, - reason=reason, - ) + _capture_file( + bundle_dir=bundle_dir, + role_name=role, + abs_path=path, + reason=reason, + policy=policy, + path_filter=path_filter, + managed_out=managed, + excluded_out=excluded, + seen_role=role_seen, ) if not pkg_to_etc_paths.get(pkg, []) and not managed: @@ -929,6 +970,7 @@ def harvest( users_notes.append(f"Failed to enumerate users: {e!r}") users_role_name = "users" + users_role_seen = seen_by_role.setdefault(users_role_name, set()) for u in user_records: users_list.append( @@ -946,38 +988,21 @@ def harvest( # Copy only safe SSH public material: authorized_keys + *.pub for sf in u.ssh_files: - if path_filter.is_excluded(sf): - users_excluded.append(ExcludedFile(path=sf, reason="user_excluded")) - continue - deny = policy.deny_reason(sf) - if deny: - users_excluded.append(ExcludedFile(path=sf, reason=deny)) - continue - try: - owner, group, mode = stat_triplet(sf) - except OSError: - users_excluded.append(ExcludedFile(path=sf, reason="unreadable")) - continue - src_rel = sf.lstrip("/") - try: - _copy_into_bundle(bundle_dir, users_role_name, sf, src_rel) - except OSError: - users_excluded.append(ExcludedFile(path=sf, reason="unreadable")) - continue reason = ( "authorized_keys" if sf.endswith("/authorized_keys") else "ssh_public_key" ) - users_managed.append( - ManagedFile( - path=sf, - src_rel=src_rel, - owner=owner, - group=group, - mode=mode, - reason=reason, - ) + _capture_file( + bundle_dir=bundle_dir, + role_name=users_role_name, + abs_path=sf, + reason=reason, + policy=policy, + path_filter=path_filter, + managed_out=users_managed, + excluded_out=users_excluded, + seen_role=users_role_seen, ) users_snapshot = UsersSnapshot( @@ -995,39 +1020,19 @@ def harvest( apt_excluded: List[ExcludedFile] = [] apt_managed: List[ManagedFile] = [] apt_role_name = "apt_config" + apt_role_seen = seen_by_role.setdefault(apt_role_name, set()) for path, reason in _iter_apt_capture_paths(): - if path_filter.is_excluded(path): - apt_excluded.append(ExcludedFile(path=path, reason="user_excluded")) - continue - - deny = policy.deny_reason(path) - if deny: - apt_excluded.append(ExcludedFile(path=path, reason=deny)) - continue - - try: - owner, group, mode = stat_triplet(path) - except OSError: - apt_excluded.append(ExcludedFile(path=path, reason="unreadable")) - continue - - src_rel = path.lstrip("/") - try: - _copy_into_bundle(bundle_dir, apt_role_name, path, src_rel) - except OSError: - apt_excluded.append(ExcludedFile(path=path, reason="unreadable")) - continue - - apt_managed.append( - ManagedFile( - path=path, - src_rel=src_rel, - owner=owner, - group=group, - mode=mode, - reason=reason, - ) + _capture_file( + bundle_dir=bundle_dir, + role_name=apt_role_name, + abs_path=path, + reason=reason, + policy=policy, + path_filter=path_filter, + managed_out=apt_managed, + excluded_out=apt_excluded, + seen_role=apt_role_seen, ) apt_config_snapshot = AptConfigSnapshot( @@ -1062,11 +1067,58 @@ def harvest( svc_by_role: Dict[str, ServiceSnapshot] = {s.role_name: s for s in service_snaps} pkg_by_role: Dict[str, PackageSnapshot] = {p.role_name: p for p in pkg_snaps} - def _target_role_for_shared_snippet(path: str) -> Optional[tuple[str, str]]: - """If `path` is a shared snippet, return (role_name, reason) to attach to.""" - base = os.path.basename(path) + # Package name -> role_name for manually-installed package roles. + pkg_name_to_role: Dict[str, str] = {p.package: p.role_name for p in pkg_snaps} - # Try full filename and stem (before first dot). + # Package name -> list of service role names that reference it. + pkg_to_service_roles: Dict[str, List[str]] = {} + for s in service_snaps: + for pkg in s.packages: + pkg_to_service_roles.setdefault(pkg, []).append(s.role_name) + + # Alias -> role mapping used as a fallback when dpkg ownership is missing. + # Prefer service roles over package roles when both would match. + alias_ranked: Dict[str, tuple[int, str]] = {} + + def _add_alias(alias: str, role_name: str, *, priority: int) -> None: + key = _safe_name(alias) + if not key: + return + cur = alias_ranked.get(key) + if ( + cur is None + or priority < cur[0] + or (priority == cur[0] and role_name < cur[1]) + ): + alias_ranked[key] = (priority, role_name) + + for role_name, aliases in service_role_aliases.items(): + for a in aliases: + _add_alias(a, role_name, priority=0) + + for p in pkg_snaps: + _add_alias(p.package, p.role_name, priority=1) + + def _target_role_for_shared_snippet(path: str) -> Optional[tuple[str, str]]: + """If `path` is a shared snippet, return (role_name, reason) to attach to. + + This is used primarily for /etc/logrotate.d/* and /etc/cron.d/* where + files are "owned" by many packages but people tend to reason about them + per service. + + Resolution order: + 1) dpkg owner -> service role (if any service references the package) + 2) dpkg owner -> package role (manual package role exists) + 3) basename/stem alias match -> preferred role + """ + if path.startswith("/etc/logrotate.d/"): + tag = "logrotate_snippet" + elif path.startswith("/etc/cron.d/"): + tag = "cron_snippet" + else: + return None + + base = os.path.basename(path) candidates: List[str] = [base] if "." in base: candidates.append(base.split(".", 1)[0]) @@ -1078,122 +1130,62 @@ def harvest( seen.add(c) uniq.append(c) - if path.startswith("/etc/logrotate.d/"): - for c in uniq: - rn = _safe_name(c) - if rn in svc_by_role or rn in pkg_by_role: - return (rn, "logrotate_snippet") - return None + pkg = dpkg_owner(path) + if pkg: + svc_roles = pkg_to_service_roles.get(pkg) + if svc_roles: + # Deterministic tie-break: lowest role name. + return (sorted(set(svc_roles))[0], tag) + pkg_role = pkg_name_to_role.get(pkg) + if pkg_role: + return (pkg_role, tag) - if path.startswith("/etc/cron.d/"): - for c in uniq: - rn = _safe_name(c) - if rn in svc_by_role or rn in pkg_by_role: - return (rn, "cron_snippet") - return None + for c in uniq: + key = _safe_name(c) + hit = alias_ranked.get(key) + if hit is not None: + return (hit[1], tag) return None + def _lists_for_role(role_name: str) -> tuple[List[ManagedFile], List[ExcludedFile]]: + if role_name in svc_by_role: + snap = svc_by_role[role_name] + return (snap.managed_files, snap.excluded) + if role_name in pkg_by_role: + snap = pkg_by_role[role_name] + return (snap.managed_files, snap.excluded) + # Fallback (shouldn't normally happen): attribute to etc_custom. + return (etc_managed, etc_excluded) + # Capture essential system config/state (even if package-owned). + etc_role_seen = seen_by_role.setdefault(etc_role_name, set()) for path, reason in _iter_system_capture_paths(): if path in already: continue target = _target_role_for_shared_snippet(path) - - if path_filter.is_excluded(path): - if target: - rn, _ = target - if rn in svc_by_role: - svc_by_role[rn].excluded.append( - ExcludedFile(path=path, reason="user_excluded") - ) - elif rn in pkg_by_role: - pkg_by_role[rn].excluded.append( - ExcludedFile(path=path, reason="user_excluded") - ) - else: - etc_excluded.append(ExcludedFile(path=path, reason="user_excluded")) - already.add(path) - continue - - deny = policy.deny_reason(path) - if deny: - if target: - rn, _ = target - if rn in svc_by_role: - svc_by_role[rn].excluded.append( - ExcludedFile(path=path, reason=deny) - ) - elif rn in pkg_by_role: - pkg_by_role[rn].excluded.append( - ExcludedFile(path=path, reason=deny) - ) - else: - etc_excluded.append(ExcludedFile(path=path, reason=deny)) - already.add(path) - continue - - try: - owner, group, mode = stat_triplet(path) - except OSError: - if target: - rn, _ = target - if rn in svc_by_role: - svc_by_role[rn].excluded.append( - ExcludedFile(path=path, reason="unreadable") - ) - elif rn in pkg_by_role: - pkg_by_role[rn].excluded.append( - ExcludedFile(path=path, reason="unreadable") - ) - else: - etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) - already.add(path) - continue - - src_rel = path.lstrip("/") - role_for_copy = etc_role_name - reason_for_role = reason - if target: + if target is not None: role_for_copy, reason_for_role = target - - try: - _copy_into_bundle(bundle_dir, role_for_copy, path, src_rel) - except OSError: - if target: - rn, _ = target - if rn in svc_by_role: - svc_by_role[rn].excluded.append( - ExcludedFile(path=path, reason="unreadable") - ) - elif rn in pkg_by_role: - pkg_by_role[rn].excluded.append( - ExcludedFile(path=path, reason="unreadable") - ) - else: - etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) - already.add(path) - continue - - mf = ManagedFile( - path=path, - src_rel=src_rel, - owner=owner, - group=group, - mode=mode, - reason=reason_for_role, - ) - if target: - rn, _ = target - if rn in svc_by_role: - svc_by_role[rn].managed_files.append(mf) - elif rn in pkg_by_role: - pkg_by_role[rn].managed_files.append(mf) + managed_out, excluded_out = _lists_for_role(role_for_copy) + role_seen = seen_by_role.setdefault(role_for_copy, set()) else: - etc_managed.append(mf) + role_for_copy, reason_for_role = (etc_role_name, reason) + managed_out, excluded_out = (etc_managed, etc_excluded) + role_seen = etc_role_seen - already.add(path) + _capture_file( + bundle_dir=bundle_dir, + role_name=role_for_copy, + abs_path=path, + reason=reason_for_role, + policy=policy, + path_filter=path_filter, + managed_out=managed_out, + excluded_out=excluded_out, + seen_role=role_seen, + seen_global=already, + ) # Walk /etc for remaining unowned config-ish files scanned = 0 @@ -1212,99 +1204,28 @@ def harvest( continue target = _target_role_for_shared_snippet(path) - - if path_filter.is_excluded(path): - if target: - rn, _ = target - if rn in svc_by_role: - svc_by_role[rn].excluded.append( - ExcludedFile(path=path, reason="user_excluded") - ) - elif rn in pkg_by_role: - pkg_by_role[rn].excluded.append( - ExcludedFile(path=path, reason="user_excluded") - ) - else: - etc_excluded.append(ExcludedFile(path=path, reason="user_excluded")) - already.add(path) - continue - - deny = policy.deny_reason(path) - if deny: - if target: - rn, _ = target - if rn in svc_by_role: - svc_by_role[rn].excluded.append( - ExcludedFile(path=path, reason=deny) - ) - elif rn in pkg_by_role: - pkg_by_role[rn].excluded.append( - ExcludedFile(path=path, reason=deny) - ) - else: - etc_excluded.append(ExcludedFile(path=path, reason=deny)) - already.add(path) - continue - - try: - owner, group, mode = stat_triplet(path) - except OSError: - if target: - rn, _ = target - if rn in svc_by_role: - svc_by_role[rn].excluded.append( - ExcludedFile(path=path, reason="unreadable") - ) - elif rn in pkg_by_role: - pkg_by_role[rn].excluded.append( - ExcludedFile(path=path, reason="unreadable") - ) - else: - etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) - already.add(path) - continue - - src_rel = path.lstrip("/") - role_for_copy = etc_role_name - reason_for_role = "custom_unowned" - if target: + if target is not None: role_for_copy, reason_for_role = target - - try: - _copy_into_bundle(bundle_dir, role_for_copy, path, src_rel) - except OSError: - if target: - rn, _ = target - if rn in svc_by_role: - svc_by_role[rn].excluded.append( - ExcludedFile(path=path, reason="unreadable") - ) - elif rn in pkg_by_role: - pkg_by_role[rn].excluded.append( - ExcludedFile(path=path, reason="unreadable") - ) - else: - etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) - already.add(path) - continue - - mf = ManagedFile( - path=path, - src_rel=src_rel, - owner=owner, - group=group, - mode=mode, - reason=reason_for_role, - ) - if target: - rn, _ = target - if rn in svc_by_role: - svc_by_role[rn].managed_files.append(mf) - elif rn in pkg_by_role: - pkg_by_role[rn].managed_files.append(mf) + managed_out, excluded_out = _lists_for_role(role_for_copy) + role_seen = seen_by_role.setdefault(role_for_copy, set()) else: - etc_managed.append(mf) - scanned += 1 + role_for_copy, reason_for_role = (etc_role_name, "custom_unowned") + managed_out, excluded_out = (etc_managed, etc_excluded) + role_seen = etc_role_seen + + if _capture_file( + bundle_dir=bundle_dir, + role_name=role_for_copy, + abs_path=path, + reason=reason_for_role, + policy=policy, + path_filter=path_filter, + managed_out=managed_out, + excluded_out=excluded_out, + seen_role=role_seen, + seen_global=already, + ): + scanned += 1 if scanned >= MAX_FILES_CAP: etc_notes.append( f"Reached file cap ({MAX_FILES_CAP}) while scanning /etc for unowned files." @@ -1339,6 +1260,7 @@ def harvest( scanned = 0 if not os.path.isdir(root): return + role_seen = seen_by_role.setdefault(ul_role_name, set()) for dirpath, _, filenames in os.walk(root): for fn in filenames: path = os.path.join(dirpath, fn) @@ -1346,54 +1268,34 @@ def harvest( continue if not os.path.isfile(path) or os.path.islink(path): continue + try: + owner, group, mode = stat_triplet(path) + except OSError: + ul_excluded.append(ExcludedFile(path=path, reason="unreadable")) + continue + if require_executable: - try: - owner, group, mode = stat_triplet(path) - except OSError: - ul_excluded.append(ExcludedFile(path=path, reason="unreadable")) - continue try: if (int(mode, 8) & 0o111) == 0: continue except ValueError: # If mode parsing fails, be conservative and skip. continue - else: - try: - owner, group, mode = stat_triplet(path) - except OSError: - ul_excluded.append(ExcludedFile(path=path, reason="unreadable")) - continue - if path_filter.is_excluded(path): - ul_excluded.append(ExcludedFile(path=path, reason="user_excluded")) - continue - - deny = policy.deny_reason(path) - if deny: - ul_excluded.append(ExcludedFile(path=path, reason=deny)) - continue - - src_rel = path.lstrip("/") - try: - _copy_into_bundle(bundle_dir, ul_role_name, path, src_rel) - except OSError: - ul_excluded.append(ExcludedFile(path=path, reason="unreadable")) - continue - - ul_managed.append( - ManagedFile( - path=path, - src_rel=src_rel, - owner=owner, - group=group, - mode=mode, - reason=reason, - ) - ) - - already_all.add(path) - scanned += 1 + if _capture_file( + bundle_dir=bundle_dir, + role_name=ul_role_name, + abs_path=path, + reason=reason, + policy=policy, + path_filter=path_filter, + managed_out=ul_managed, + excluded_out=ul_excluded, + seen_role=role_seen, + metadata=(owner, group, mode), + ): + already_all.add(path) + scanned += 1 if scanned >= cap: ul_notes.append(f"Reached file cap ({cap}) while scanning {root}.") return @@ -1428,6 +1330,7 @@ def harvest( extra_excluded: List[ExcludedFile] = [] extra_managed: List[ManagedFile] = [] extra_role_name = "extra_paths" + extra_role_seen = seen_by_role.setdefault(extra_role_name, set()) include_specs = list(include_paths or []) exclude_specs = list(exclude_paths or []) @@ -1453,39 +1356,18 @@ def harvest( if path in already_all: continue - if path_filter.is_excluded(path): - extra_excluded.append(ExcludedFile(path=path, reason="user_excluded")) - continue - - deny = policy.deny_reason(path) - if deny: - extra_excluded.append(ExcludedFile(path=path, reason=deny)) - continue - - try: - owner, group, mode = stat_triplet(path) - except OSError: - extra_excluded.append(ExcludedFile(path=path, reason="unreadable")) - continue - - src_rel = path.lstrip("/") - try: - _copy_into_bundle(bundle_dir, extra_role_name, path, src_rel) - except OSError: - extra_excluded.append(ExcludedFile(path=path, reason="unreadable")) - continue - - extra_managed.append( - ManagedFile( - path=path, - src_rel=src_rel, - owner=owner, - group=group, - mode=mode, - reason="user_include", - ) - ) - already_all.add(path) + if _capture_file( + bundle_dir=bundle_dir, + role_name=extra_role_name, + abs_path=path, + reason="user_include", + policy=policy, + path_filter=path_filter, + managed_out=extra_managed, + excluded_out=extra_excluded, + seen_role=extra_role_seen, + ): + already_all.add(path) extra_paths_snapshot = ExtraPathsSnapshot( role_name=extra_role_name, diff --git a/enroll/pathfilter.py b/enroll/pathfilter.py index 6541ca9..680d390 100644 --- a/enroll/pathfilter.py +++ b/enroll/pathfilter.py @@ -141,7 +141,7 @@ class PathFilter: - Regex: prefix with 're:' or 'regex:' - Force glob: prefix with 'glob:' - A plain path without wildcards matches that path and everything under it - (directory-prefix behavior). + (directory-prefix behaviour). Examples: --exclude-path /usr/local/bin/docker-* diff --git a/pyproject.toml b/pyproject.toml index 3aa01d0..c7356bc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "enroll" -version = "0.1.5" +version = "0.1.6" description = "Enroll a server's running state retrospectively into Ansible" authors = ["Miguel Jacq "] license = "GPL-3.0-or-later" diff --git a/rpm/enroll.spec b/rpm/enroll.spec index ed0a3c9..637dee1 100644 --- a/rpm/enroll.spec +++ b/rpm/enroll.spec @@ -1,4 +1,4 @@ -%global upstream_version 0.1.5 +%global upstream_version 0.1.6 Name: enroll Version: %{upstream_version} @@ -44,6 +44,9 @@ Enroll a server's running state retrospectively into Ansible. %changelog * Sun Dec 28 2025 Miguel Jacq - %{version}-%{release} +- DRY up some code logic +- More test coverage +* Sun Dec 28 2025 Miguel Jacq - %{version}-%{release} - Consolidate logrotate and cron files into their main service/package roles if they exist. - Standardise on MAX_FILES_CAP in one place - Manage apt stuff in its own role, not in etc_custom diff --git a/tests/test___main__.py b/tests/test___main__.py new file mode 100644 index 0000000..2e83ac1 --- /dev/null +++ b/tests/test___main__.py @@ -0,0 +1,18 @@ +from __future__ import annotations + +import runpy + + +def test_module_main_invokes_cli_main(monkeypatch): + import enroll.cli + + called = {"ok": False} + + def fake_main() -> None: + called["ok"] = True + + monkeypatch.setattr(enroll.cli, "main", fake_main) + + # Execute enroll.__main__ as if `python -m enroll`. + runpy.run_module("enroll.__main__", run_name="__main__") + assert called["ok"] is True diff --git a/tests/test_accounts.py b/tests/test_accounts.py new file mode 100644 index 0000000..d5cc267 --- /dev/null +++ b/tests/test_accounts.py @@ -0,0 +1,143 @@ +from __future__ import annotations + +import os +from pathlib import Path + + +def test_parse_login_defs_parses_known_keys(tmp_path: Path): + from enroll.accounts import parse_login_defs + + p = tmp_path / "login.defs" + p.write_text( + """ + # comment + UID_MIN 1000 + UID_MAX 60000 + SYS_UID_MIN 100 + SYS_UID_MAX 999 + UID_MIN not_an_int + OTHER 123 + """, + encoding="utf-8", + ) + + vals = parse_login_defs(str(p)) + assert vals["UID_MIN"] == 1000 + assert vals["UID_MAX"] == 60000 + assert vals["SYS_UID_MIN"] == 100 + assert vals["SYS_UID_MAX"] == 999 + assert "OTHER" not in vals + + +def test_parse_passwd_and_group_and_ssh_files(tmp_path: Path): + from enroll.accounts import find_user_ssh_files, parse_group, parse_passwd + + passwd = tmp_path / "passwd" + passwd.write_text( + "\n".join( + [ + "root:x:0:0:root:/root:/bin/bash", + "# comment", + "alice:x:1000:1000:Alice:/home/alice:/bin/bash", + "bob:x:1001:1000:Bob:/home/bob:/usr/sbin/nologin", + "badline", + "cathy:x:notint:1000:Cathy:/home/cathy:/bin/bash", + "", + ] + ), + encoding="utf-8", + ) + + group = tmp_path / "group" + group.write_text( + "\n".join( + [ + "root:x:0:", + "users:x:1000:alice,bob", + "admins:x:1002:alice", + "badgroup:x:notint:alice", + "", + ] + ), + encoding="utf-8", + ) + + rows = parse_passwd(str(passwd)) + assert ("alice", 1000, 1000, "Alice", "/home/alice", "/bin/bash") in rows + assert all(r[0] != "cathy" for r in rows) # skipped invalid UID + + gid_to_name, name_to_gid, members = parse_group(str(group)) + assert gid_to_name[1000] == "users" + assert name_to_gid["admins"] == 1002 + assert "alice" in members["admins"] + + # ssh discovery: only authorized_keys, no symlinks + home = tmp_path / "home" / "alice" + sshdir = home / ".ssh" + sshdir.mkdir(parents=True) + ak = sshdir / "authorized_keys" + ak.write_text("ssh-ed25519 AAA...", encoding="utf-8") + # a symlink should be ignored + (sshdir / "authorized_keys2").write_text("x", encoding="utf-8") + os.symlink(str(sshdir / "authorized_keys2"), str(sshdir / "authorized_keys_link")) + assert find_user_ssh_files(str(home)) == [str(ak)] + + +def test_collect_non_system_users(monkeypatch, tmp_path: Path): + import enroll.accounts as a + + orig_parse_login_defs = a.parse_login_defs + orig_parse_passwd = a.parse_passwd + orig_parse_group = a.parse_group + + # Provide controlled passwd/group/login.defs inputs via monkeypatch. + passwd = tmp_path / "passwd" + passwd.write_text( + "\n".join( + [ + "root:x:0:0:root:/root:/bin/bash", + "nobody:x:65534:65534:nobody:/nonexistent:/usr/sbin/nologin", + "alice:x:1000:1000:Alice:/home/alice:/bin/bash", + "sysuser:x:200:200:Sys:/home/sys:/bin/bash", + "bob:x:1001:1000:Bob:/home/bob:/bin/false", + "", + ] + ), + encoding="utf-8", + ) + group = tmp_path / "group" + group.write_text( + "\n".join( + [ + "users:x:1000:alice,bob", + "admins:x:1002:alice", + "", + ] + ), + encoding="utf-8", + ) + + defs = tmp_path / "login.defs" + defs.write_text("UID_MIN 1000\n", encoding="utf-8") + + monkeypatch.setattr( + a, "parse_login_defs", lambda path=str(defs): orig_parse_login_defs(path) + ) + monkeypatch.setattr( + a, "parse_passwd", lambda path=str(passwd): orig_parse_passwd(path) + ) + monkeypatch.setattr( + a, "parse_group", lambda path=str(group): orig_parse_group(path) + ) + + # Use a stable fake ssh discovery. + monkeypatch.setattr( + a, "find_user_ssh_files", lambda home: [f"{home}/.ssh/authorized_keys"] + ) + + users = a.collect_non_system_users() + assert [u.name for u in users] == ["alice"] + u = users[0] + assert u.primary_group == "users" + assert u.supplementary_groups == ["admins"] + assert u.ssh_files == ["/home/alice/.ssh/authorized_keys"] diff --git a/tests/test_debian.py b/tests/test_debian.py new file mode 100644 index 0000000..333afc1 --- /dev/null +++ b/tests/test_debian.py @@ -0,0 +1,154 @@ +from __future__ import annotations + +import hashlib +from pathlib import Path + + +def test_dpkg_owner_parses_output(monkeypatch): + import enroll.debian as d + + class P: + def __init__(self, rc: int, out: str): + self.returncode = rc + self.stdout = out + self.stderr = "" + + def fake_run(cmd, text, capture_output): + assert cmd[:2] == ["dpkg", "-S"] + return P( + 0, + """ + diversion by foo from: /etc/something + nginx-common:amd64: /etc/nginx/nginx.conf + nginx-common, nginx: /etc/nginx/sites-enabled/default + """, + ) + + monkeypatch.setattr(d.subprocess, "run", fake_run) + assert d.dpkg_owner("/etc/nginx/nginx.conf") == "nginx-common" + + def fake_run_none(cmd, text, capture_output): + return P(1, "") + + monkeypatch.setattr(d.subprocess, "run", fake_run_none) + assert d.dpkg_owner("/missing") is None + + +def test_list_manual_packages_parses_and_sorts(monkeypatch): + import enroll.debian as d + + class P: + def __init__(self, rc: int, out: str): + self.returncode = rc + self.stdout = out + self.stderr = "" + + def fake_run(cmd, text, capture_output): + assert cmd == ["apt-mark", "showmanual"] + return P(0, "\n# comment\nnginx\nvim\nnginx\n") + + monkeypatch.setattr(d.subprocess, "run", fake_run) + assert d.list_manual_packages() == ["nginx", "vim"] + + +def test_build_dpkg_etc_index(tmp_path: Path): + import enroll.debian as d + + info = tmp_path / "info" + info.mkdir() + (info / "nginx.list").write_text( + "/etc/nginx/nginx.conf\n/etc/nginx/sites-enabled/default\n/usr/bin/nginx\n", + encoding="utf-8", + ) + (info / "vim:amd64.list").write_text( + "/etc/vim/vimrc\n/usr/bin/vim\n", + encoding="utf-8", + ) + + owned, owner_map, topdir_to_pkgs, pkg_to_etc = d.build_dpkg_etc_index(str(info)) + assert "/etc/nginx/nginx.conf" in owned + assert owner_map["/etc/nginx/nginx.conf"] == "nginx" + assert "nginx" in topdir_to_pkgs + assert topdir_to_pkgs["nginx"] == {"nginx"} + assert pkg_to_etc["vim"] == ["/etc/vim/vimrc"] + + +def test_parse_status_conffiles_handles_continuations(tmp_path: Path): + import enroll.debian as d + + status = tmp_path / "status" + status.write_text( + "\n".join( + [ + "Package: nginx", + "Version: 1", + "Conffiles:", + " /etc/nginx/nginx.conf abcdef", + " /etc/nginx/mime.types 123456", + "", + "Package: other", + "Version: 2", + "", + ] + ), + encoding="utf-8", + ) + m = d.parse_status_conffiles(str(status)) + assert m["nginx"]["/etc/nginx/nginx.conf"] == "abcdef" + assert m["nginx"]["/etc/nginx/mime.types"] == "123456" + assert "other" not in m + + +def test_read_pkg_md5sums_and_file_md5(tmp_path: Path, monkeypatch): + import enroll.debian as d + + # Patch /var/lib/dpkg/info/.md5sums lookup to a tmp file. + md5_file = tmp_path / "pkg.md5sums" + md5_file.write_text("0123456789abcdef etc/foo.conf\n", encoding="utf-8") + + def fake_exists(path: str) -> bool: + return path.endswith("/var/lib/dpkg/info/p1.md5sums") + + real_open = open + + def fake_open(path: str, *args, **kwargs): + if path.endswith("/var/lib/dpkg/info/p1.md5sums"): + return real_open(md5_file, *args, **kwargs) + return real_open(path, *args, **kwargs) + + monkeypatch.setattr(d.os.path, "exists", fake_exists) + monkeypatch.setattr("builtins.open", fake_open) + + m = d.read_pkg_md5sums("p1") + assert m == {"etc/foo.conf": "0123456789abcdef"} + + content = b"hello world\n" + p = tmp_path / "x" + p.write_bytes(content) + assert d.file_md5(str(p)) == hashlib.md5(content).hexdigest() + + +def test_stat_triplet_fallbacks(tmp_path: Path, monkeypatch): + import enroll.debian as d + import sys + + p = tmp_path / "f" + p.write_text("x", encoding="utf-8") + + class FakePwdMod: + @staticmethod + def getpwuid(_): # pragma: no cover + raise KeyError + + class FakeGrpMod: + @staticmethod + def getgrgid(_): # pragma: no cover + raise KeyError + + # stat_triplet imports pwd/grp inside the function, so patch sys.modules. + monkeypatch.setitem(sys.modules, "pwd", FakePwdMod) + monkeypatch.setitem(sys.modules, "grp", FakeGrpMod) + owner, group, mode = d.stat_triplet(str(p)) + assert owner.isdigit() + assert group.isdigit() + assert mode.isdigit() and len(mode) == 4 diff --git a/tests/test_diff_bundle.py b/tests/test_diff_bundle.py new file mode 100644 index 0000000..66ef094 --- /dev/null +++ b/tests/test_diff_bundle.py @@ -0,0 +1,89 @@ +from __future__ import annotations + +import os +import tarfile +from pathlib import Path + +import pytest + + +def _make_bundle_dir(tmp_path: Path) -> Path: + b = tmp_path / "bundle" + (b / "artifacts").mkdir(parents=True) + (b / "state.json").write_text("{}\n", encoding="utf-8") + return b + + +def _tar_gz_of_dir(src: Path, out: Path) -> None: + with tarfile.open(out, mode="w:gz") as tf: + # tar -C src . semantics + for p in src.rglob("*"): + rel = p.relative_to(src) + tf.add(p, arcname=str(rel)) + + +def test_bundle_from_directory_and_statejson_path(tmp_path: Path): + import enroll.diff as d + + b = _make_bundle_dir(tmp_path) + + br1 = d._bundle_from_input(str(b), sops_mode=False) + assert br1.dir == b + assert br1.state_path.exists() + + br2 = d._bundle_from_input(str(b / "state.json"), sops_mode=False) + assert br2.dir == b + + +def test_bundle_from_tarball_extracts(tmp_path: Path): + import enroll.diff as d + + b = _make_bundle_dir(tmp_path) + tgz = tmp_path / "bundle.tgz" + _tar_gz_of_dir(b, tgz) + + br = d._bundle_from_input(str(tgz), sops_mode=False) + try: + assert br.dir.is_dir() + assert (br.dir / "state.json").exists() + finally: + if br.tempdir: + br.tempdir.cleanup() + + +def test_bundle_from_sops_like_file(monkeypatch, tmp_path: Path): + import enroll.diff as d + + b = _make_bundle_dir(tmp_path) + tgz = tmp_path / "bundle.tar.gz" + _tar_gz_of_dir(b, tgz) + + # Pretend the tarball is an encrypted bundle by giving it a .sops name. + sops_path = tmp_path / "bundle.tar.gz.sops" + sops_path.write_bytes(tgz.read_bytes()) + + # Stub out sops machinery: "decrypt" just copies through. + monkeypatch.setattr(d, "require_sops_cmd", lambda: "sops") + + def fake_decrypt(src: Path, dest: Path, mode: int): + dest.write_bytes(Path(src).read_bytes()) + try: + os.chmod(dest, mode) + except OSError: + pass + + monkeypatch.setattr(d, "decrypt_file_binary_to", fake_decrypt) + + br = d._bundle_from_input(str(sops_path), sops_mode=False) + try: + assert (br.dir / "state.json").exists() + finally: + if br.tempdir: + br.tempdir.cleanup() + + +def test_bundle_from_input_missing_path(tmp_path: Path): + import enroll.diff as d + + with pytest.raises(RuntimeError, match="not found"): + d._bundle_from_input(str(tmp_path / "nope"), sops_mode=False) diff --git a/tests/test_pathfilter.py b/tests/test_pathfilter.py new file mode 100644 index 0000000..406b7e7 --- /dev/null +++ b/tests/test_pathfilter.py @@ -0,0 +1,80 @@ +from __future__ import annotations + +import os +from pathlib import Path + + +def test_compile_and_match_prefix_glob_and_regex(tmp_path: Path): + from enroll.pathfilter import PathFilter, compile_path_pattern + + # prefix semantics: matches the exact path and subtree + p = compile_path_pattern("/etc/nginx") + assert p.kind == "prefix" + assert p.matches("/etc/nginx") + assert p.matches("/etc/nginx/nginx.conf") + assert not p.matches("/etc/nginx2/nginx.conf") + + # glob semantics + g = compile_path_pattern("/etc/**/*.conf") + assert g.kind == "glob" + assert g.matches("/etc/nginx/nginx.conf") + assert not g.matches("/var/etc/nginx.conf") + + # explicit glob + g2 = compile_path_pattern("glob:/home/*/.bashrc") + assert g2.kind == "glob" + assert g2.matches("/home/alice/.bashrc") + + # regex semantics (search, not match) + r = compile_path_pattern(r"re:/home/[^/]+/\.ssh/authorized_keys$") + assert r.kind == "regex" + assert r.matches("/home/alice/.ssh/authorized_keys") + assert not r.matches("/home/alice/.ssh/authorized_keys2") + + # invalid regex: never matches + bad = compile_path_pattern("re:[") + assert bad.kind == "regex" + assert not bad.matches("/etc/passwd") + + # exclude wins + pf = PathFilter(exclude=["/etc/nginx"], include=["/etc/nginx/nginx.conf"]) + assert pf.is_excluded("/etc/nginx/nginx.conf") + + +def test_expand_includes_respects_exclude_symlinks_and_caps(tmp_path: Path): + from enroll.pathfilter import PathFilter, compile_path_pattern, expand_includes + + root = tmp_path / "root" + (root / "a").mkdir(parents=True) + (root / "a" / "one.txt").write_text("1", encoding="utf-8") + (root / "a" / "two.txt").write_text("2", encoding="utf-8") + (root / "b").mkdir() + (root / "b" / "secret.txt").write_text("s", encoding="utf-8") + + # symlink file should be ignored + os.symlink(str(root / "a" / "one.txt"), str(root / "a" / "link.txt")) + + exclude = PathFilter(exclude=[str(root / "b")]) + + pats = [ + compile_path_pattern(str(root / "a")), + compile_path_pattern("glob:" + str(root / "**" / "*.txt")), + ] + + paths, notes = expand_includes(pats, exclude=exclude, max_files=2) + # cap should limit to 2 files + assert len(paths) == 2 + assert any("cap" in n.lower() for n in notes) + # excluded dir should not contribute + assert all("/b/" not in p for p in paths) + # symlink ignored + assert all(not p.endswith("link.txt") for p in paths) + + +def test_expand_includes_notes_on_no_matches(tmp_path: Path): + from enroll.pathfilter import compile_path_pattern, expand_includes + + pats = [compile_path_pattern(str(tmp_path / "does_not_exist"))] + paths, notes = expand_includes(pats, max_files=10) + assert paths == [] + assert any("matched no files" in n.lower() for n in notes) diff --git a/tests/test_remote.py b/tests/test_remote.py new file mode 100644 index 0000000..576c0b1 --- /dev/null +++ b/tests/test_remote.py @@ -0,0 +1,175 @@ +from __future__ import annotations + +import io +import tarfile +from pathlib import Path + +import pytest + + +def _make_tgz_bytes(files: dict[str, bytes]) -> bytes: + bio = io.BytesIO() + with tarfile.open(fileobj=bio, mode="w:gz") as tf: + for name, content in files.items(): + ti = tarfile.TarInfo(name=name) + ti.size = len(content) + tf.addfile(ti, io.BytesIO(content)) + return bio.getvalue() + + +def test_safe_extract_tar_rejects_path_traversal(tmp_path: Path): + from enroll.remote import _safe_extract_tar + + # Build an unsafe tar with ../ traversal + bio = io.BytesIO() + with tarfile.open(fileobj=bio, mode="w:gz") as tf: + ti = tarfile.TarInfo(name="../evil") + ti.size = 1 + tf.addfile(ti, io.BytesIO(b"x")) + + bio.seek(0) + with tarfile.open(fileobj=bio, mode="r:gz") as tf: + with pytest.raises(RuntimeError, match="Unsafe tar member path"): + _safe_extract_tar(tf, tmp_path) + + +def test_safe_extract_tar_rejects_symlinks(tmp_path: Path): + from enroll.remote import _safe_extract_tar + + bio = io.BytesIO() + with tarfile.open(fileobj=bio, mode="w:gz") as tf: + ti = tarfile.TarInfo(name="link") + ti.type = tarfile.SYMTYPE + ti.linkname = "/etc/passwd" + tf.addfile(ti) + + bio.seek(0) + with tarfile.open(fileobj=bio, mode="r:gz") as tf: + with pytest.raises(RuntimeError, match="Refusing to extract"): + _safe_extract_tar(tf, tmp_path) + + +def test_remote_harvest_happy_path(tmp_path: Path, monkeypatch): + import sys + + import enroll.remote as r + + # Avoid building a real zipapp; just create a file. + def fake_build(_td: Path) -> Path: + p = _td / "enroll.pyz" + p.write_bytes(b"PYZ") + return p + + monkeypatch.setattr(r, "_build_enroll_pyz", fake_build) + + # Prepare a tiny harvest bundle tar stream from the "remote". + tgz = _make_tgz_bytes({"state.json": b'{"ok": true}\n'}) + + calls: list[str] = [] + + class _Chan: + def __init__(self, rc: int = 0): + self._rc = rc + + def recv_exit_status(self) -> int: + return self._rc + + class _Stdout: + def __init__(self, payload: bytes = b"", rc: int = 0): + self._bio = io.BytesIO(payload) + self.channel = _Chan(rc) + + def read(self, n: int = -1) -> bytes: + return self._bio.read(n) + + class _Stderr: + def __init__(self, payload: bytes = b""): + self._bio = io.BytesIO(payload) + + def read(self, n: int = -1) -> bytes: + return self._bio.read(n) + + class _SFTP: + def __init__(self): + self.put_calls: list[tuple[str, str]] = [] + + def put(self, local: str, remote: str) -> None: + self.put_calls.append((local, remote)) + + def close(self) -> None: + return + + class FakeSSH: + def __init__(self): + self._sftp = _SFTP() + + def load_system_host_keys(self): + return + + def set_missing_host_key_policy(self, _policy): + return + + def connect(self, **kwargs): + # Accept any connect parameters. + return + + def open_sftp(self): + return self._sftp + + def exec_command(self, cmd: str): + calls.append(cmd) + # The tar stream uses exec_command directly. + if cmd.startswith("tar -cz -C"): + return (None, _Stdout(tgz, rc=0), _Stderr(b"")) + + # _ssh_run path: id -un, mktemp -d, chmod, sudo harvest, sudo chown, rm -rf + if cmd == "id -un": + return (None, _Stdout(b"alice\n"), _Stderr()) + if cmd == "mktemp -d": + return (None, _Stdout(b"/tmp/enroll-remote-123\n"), _Stderr()) + if cmd.startswith("chmod 700"): + return (None, _Stdout(b""), _Stderr()) + if " harvest " in cmd: + return (None, _Stdout(b""), _Stderr()) + if cmd.startswith("sudo chown -R"): + return (None, _Stdout(b""), _Stderr()) + if cmd.startswith("rm -rf"): + return (None, _Stdout(b""), _Stderr()) + + return (None, _Stdout(b""), _Stderr(b"unknown")) + + def close(self): + return + + import types + + class RejectPolicy: + pass + + FakeParamiko = types.SimpleNamespace(SSHClient=FakeSSH, RejectPolicy=RejectPolicy) + + # Provide a fake paramiko module. + monkeypatch.setitem(sys.modules, "paramiko", FakeParamiko) + + out_dir = tmp_path / "out" + state_path = r.remote_harvest( + local_out_dir=out_dir, + remote_host="example.com", + remote_port=2222, + remote_user=None, + include_paths=["/etc/nginx/nginx.conf"], + exclude_paths=["/etc/shadow"], + dangerous=True, + no_sudo=False, + ) + + assert state_path == out_dir / "state.json" + assert state_path.exists() + assert b"ok" in state_path.read_bytes() + + # Ensure we attempted remote harvest with sudo and passed include/exclude and dangerous. + joined = "\n".join(calls) + assert "sudo" in joined + assert "--dangerous" in joined + assert "--include-path" in joined + assert "--exclude-path" in joined diff --git a/tests/test_systemd.py b/tests/test_systemd.py new file mode 100644 index 0000000..f351159 --- /dev/null +++ b/tests/test_systemd.py @@ -0,0 +1,121 @@ +from __future__ import annotations + +import pytest + + +def test_list_enabled_services_and_timers_filters_templates(monkeypatch): + import enroll.systemd as s + + def fake_run(cmd: list[str]) -> str: + if "--type=service" in cmd: + return "\n".join( + [ + "nginx.service enabled", + "getty@.service enabled", # template + "foo@bar.service enabled", # instance units are included + "ssh.service enabled", + ] + ) + if "--type=timer" in cmd: + return "\n".join( + [ + "apt-daily.timer enabled", + "foo@.timer enabled", # template + ] + ) + raise AssertionError("unexpected") + + monkeypatch.setattr(s, "_run", fake_run) + assert s.list_enabled_services() == [ + "foo@bar.service", + "nginx.service", + "ssh.service", + ] + assert s.list_enabled_timers() == ["apt-daily.timer"] + + +def test_get_unit_info_parses_fields(monkeypatch): + import enroll.systemd as s + + class P: + def __init__(self, rc: int, out: str, err: str = ""): + self.returncode = rc + self.stdout = out + self.stderr = err + + def fake_run(cmd, check, text, capture_output): + assert cmd[0:2] == ["systemctl", "show"] + return P( + 0, + "\n".join( + [ + "FragmentPath=/lib/systemd/system/nginx.service", + "DropInPaths=/etc/systemd/system/nginx.service.d/override.conf /etc/systemd/system/nginx.service.d/extra.conf", + "EnvironmentFiles=-/etc/default/nginx /etc/nginx/env", + "ExecStart={ path=/usr/sbin/nginx ; argv[]=/usr/sbin/nginx -g daemon off; }", + "ActiveState=active", + "SubState=running", + "UnitFileState=enabled", + "ConditionResult=yes", + ] + ), + ) + + monkeypatch.setattr(s.subprocess, "run", fake_run) + ui = s.get_unit_info("nginx.service") + assert ui.fragment_path == "/lib/systemd/system/nginx.service" + assert "/etc/default/nginx" in ui.env_files + assert "/etc/nginx/env" in ui.env_files + assert "/usr/sbin/nginx" in ui.exec_paths + assert ui.active_state == "active" + + +def test_get_unit_info_raises_unit_query_error(monkeypatch): + import enroll.systemd as s + + class P: + def __init__(self, rc: int, out: str, err: str): + self.returncode = rc + self.stdout = out + self.stderr = err + + def fake_run(cmd, check, text, capture_output): + return P(1, "", "no such unit") + + monkeypatch.setattr(s.subprocess, "run", fake_run) + with pytest.raises(s.UnitQueryError) as ei: + s.get_unit_info("missing.service") + assert "missing.service" in str(ei.value) + assert ei.value.unit == "missing.service" + + +def test_get_timer_info_parses_fields(monkeypatch): + import enroll.systemd as s + + class P: + def __init__(self, rc: int, out: str, err: str = ""): + self.returncode = rc + self.stdout = out + self.stderr = err + + def fake_run(cmd, text, capture_output): + return P( + 0, + "\n".join( + [ + "FragmentPath=/lib/systemd/system/apt-daily.timer", + "DropInPaths=", + "EnvironmentFiles=-/etc/default/apt", + "Unit=apt-daily.service", + "ActiveState=active", + "SubState=waiting", + "UnitFileState=enabled", + "ConditionResult=yes", + ] + ), + ) + + monkeypatch.setattr(s.subprocess, "run", fake_run) + ti = s.get_timer_info("apt-daily.timer") + assert ti.trigger_unit == "apt-daily.service" + assert "/etc/default/apt" in ti.env_files From 8c19473e18b388b95ac3a5f77942cd081c17e889 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sun, 28 Dec 2025 18:37:14 +1100 Subject: [PATCH 15/69] Fix an attribution bug for certain files ending up in the wrong package/role. --- CHANGELOG.md | 4 ++ debian/changelog | 6 +++ enroll/harvest.py | 44 +++++++++++++++-- pyproject.toml | 2 +- rpm/enroll.spec | 4 +- tests/test_harvest.py | 107 ++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 160 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2a4c39d..f2cb109 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +# 0.1.7 + + * Fix an attribution bug for certain files ending up in the wrong package/role. + # 0.1.6 * DRY up some code logic diff --git a/debian/changelog b/debian/changelog index a15c38a..eabdefc 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +enroll (0.1.7) unstable; urgency=medium + + * Fix an attribution bug for certain files ending up in the wrong package/role. + + -- Miguel Jacq Sun, 28 Dec 2025 18:30:00 +1100 + enroll (0.1.6) unstable; urgency=medium * DRY up some code logic diff --git a/enroll/harvest.py b/enroll/harvest.py index 56e5aed..d678b89 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -292,9 +292,26 @@ def _hint_names(unit: str, pkgs: Set[str]) -> Set[str]: def _add_pkgs_from_etc_topdirs( hints: Set[str], topdir_to_pkgs: Dict[str, Set[str]], pkgs: Set[str] ) -> None: + """Expand a service's package set using dpkg-owned /etc top-level dirs. + + This is a heuristic: many Debian packages split a service across multiple + packages (e.g. nginx + nginx-common) while sharing a single /etc/ + tree. + + We intentionally *avoid* using shared trees (e.g. /etc/cron.d, /etc/ssl, + /etc/apparmor.d) to expand package sets, because many unrelated packages + legitimately install files there. + + We also consider the common ".d" variant (e.g. hint "apparmor" -> + topdir "apparmor.d") so we can explicitly skip known shared trees. + """ + for h in hints: - for p in topdir_to_pkgs.get(h, set()): - pkgs.add(p) + for top in (h, f"{h}.d"): + if top in SHARED_ETC_TOPDIRS: + continue + for p in topdir_to_pkgs.get(top, set()): + pkgs.add(p) def _maybe_add_specific_paths(hints: Set[str]) -> List[str]: @@ -1132,10 +1149,27 @@ def harvest( pkg = dpkg_owner(path) if pkg: - svc_roles = pkg_to_service_roles.get(pkg) + svc_roles = sorted(set(pkg_to_service_roles.get(pkg, []))) if svc_roles: - # Deterministic tie-break: lowest role name. - return (sorted(set(svc_roles))[0], tag) + # If multiple service roles reference the same package, prefer + # the role that most closely matches the snippet name (basename + # or stem). This avoids surprising attributions such as an + # AppArmor loader role "claiming" a cron/logrotate snippet + # that is clearly named after another package/service. + if len(svc_roles) > 1: + # Direct role-name matches first. + for c in [pkg, *uniq]: + rn = _safe_name(c) + if rn in svc_roles: + return (rn, tag) + # Next, use the alias map if it points at one of the roles. + for c in [pkg, *uniq]: + hit = alias_ranked.get(_safe_name(c)) + if hit is not None and hit[1] in svc_roles: + return (hit[1], tag) + + # Deterministic fallback: lowest role name. + return (svc_roles[0], tag) pkg_role = pkg_name_to_role.get(pkg) if pkg_role: return (pkg_role, tag) diff --git a/pyproject.toml b/pyproject.toml index c7356bc..ca875e8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "enroll" -version = "0.1.6" +version = "0.1.7" description = "Enroll a server's running state retrospectively into Ansible" authors = ["Miguel Jacq "] license = "GPL-3.0-or-later" diff --git a/rpm/enroll.spec b/rpm/enroll.spec index 637dee1..f63a12c 100644 --- a/rpm/enroll.spec +++ b/rpm/enroll.spec @@ -1,4 +1,4 @@ -%global upstream_version 0.1.6 +%global upstream_version 0.1.7 Name: enroll Version: %{upstream_version} @@ -44,6 +44,8 @@ Enroll a server's running state retrospectively into Ansible. %changelog * Sun Dec 28 2025 Miguel Jacq - %{version}-%{release} +- Fix an attribution bug for certain files ending up in the wrong package/role. +* Sun Dec 28 2025 Miguel Jacq - %{version}-%{release} - DRY up some code logic - More test coverage * Sun Dec 28 2025 Miguel Jacq - %{version}-%{release} diff --git a/tests/test_harvest.py b/tests/test_harvest.py index a832c81..fa796f0 100644 --- a/tests/test_harvest.py +++ b/tests/test_harvest.py @@ -176,3 +176,110 @@ def test_harvest_dedup_manual_packages_and_builds_etc_custom( assert any(mf["path"] == "/usr/local/etc/myapp.conf" for mf in ul["managed_files"]) assert any(mf["path"] == "/usr/local/bin/myscript" for mf in ul["managed_files"]) assert all(mf["path"] != "/usr/local/bin/readme.txt" for mf in ul["managed_files"]) + + +def test_shared_cron_snippet_prefers_matching_role_over_lexicographic( + monkeypatch, tmp_path: Path +): + """Regression test for shared snippet routing. + + When multiple service roles reference the same owning package, we prefer the + role whose name matches the snippet/package (e.g. ntpsec) rather than a + lexicographic tie-break that could incorrectly pick another role. + """ + + bundle = tmp_path / "bundle" + + files = {"/etc/cron.d/ntpsec": b"# cron\n"} + dirs = {"/etc", "/etc/cron.d"} + + monkeypatch.setattr(h.os.path, "isfile", lambda p: p in files) + monkeypatch.setattr(h.os.path, "islink", lambda p: False) + monkeypatch.setattr(h.os.path, "isdir", lambda p: p in dirs) + monkeypatch.setattr(h.os.path, "exists", lambda p: p in files or p in dirs) + monkeypatch.setattr(h.os, "walk", lambda root: [("/etc/cron.d", [], ["ntpsec"])]) + + # Only include the cron snippet in the system capture set. + monkeypatch.setattr( + h, "_iter_system_capture_paths", lambda: [("/etc/cron.d/ntpsec", "system_cron")] + ) + + monkeypatch.setattr( + h, "list_enabled_services", lambda: ["apparmor.service", "ntpsec.service"] + ) + + def fake_unit_info(unit: str) -> UnitInfo: + if unit == "apparmor.service": + return UnitInfo( + name=unit, + fragment_path="/lib/systemd/system/apparmor.service", + dropin_paths=[], + env_files=[], + exec_paths=["/usr/sbin/apparmor"], + active_state="active", + sub_state="running", + unit_file_state="enabled", + condition_result=None, + ) + return UnitInfo( + name=unit, + fragment_path="/lib/systemd/system/ntpsec.service", + dropin_paths=[], + env_files=[], + exec_paths=["/usr/sbin/ntpd"], + active_state="active", + sub_state="running", + unit_file_state="enabled", + condition_result=None, + ) + + monkeypatch.setattr(h, "get_unit_info", fake_unit_info) + + # Dpkg /etc index: no owned /etc paths needed for this test. + monkeypatch.setattr( + h, + "build_dpkg_etc_index", + lambda: (set(), {}, {}, {}), + ) + monkeypatch.setattr(h, "parse_status_conffiles", lambda: {}) + monkeypatch.setattr(h, "read_pkg_md5sums", lambda pkg: {}) + monkeypatch.setattr(h, "file_md5", lambda path: "x") + monkeypatch.setattr(h, "list_manual_packages", lambda: []) + monkeypatch.setattr(h, "collect_non_system_users", lambda: []) + + # Make apparmor *also* claim the ntpsec package (simulates overly-broad + # package inference). The snippet routing should still prefer role 'ntpsec'. + def fake_dpkg_owner(p: str): + if p == "/etc/cron.d/ntpsec": + return "ntpsec" + if "apparmor" in p: + return "ntpsec" # intentionally misleading + if "ntpsec" in p or "ntpd" in p: + return "ntpsec" + return None + + monkeypatch.setattr(h, "dpkg_owner", fake_dpkg_owner) + monkeypatch.setattr(h, "stat_triplet", lambda p: ("root", "root", "0644")) + + def fake_copy(bundle_dir: str, role_name: str, abs_path: str, src_rel: str): + dst = Path(bundle_dir) / "artifacts" / role_name / src_rel + dst.parent.mkdir(parents=True, exist_ok=True) + dst.write_bytes(files[abs_path]) + + monkeypatch.setattr(h, "_copy_into_bundle", fake_copy) + + class AllowAll: + def deny_reason(self, path: str): + return None + + state_path = h.harvest(str(bundle), policy=AllowAll()) + st = json.loads(Path(state_path).read_text(encoding="utf-8")) + + # Cron snippet should end up attached to the ntpsec role, not apparmor. + svc_ntpsec = next(s for s in st["services"] if s["role_name"] == "ntpsec") + assert any(mf["path"] == "/etc/cron.d/ntpsec" for mf in svc_ntpsec["managed_files"]) + + svc_apparmor = next(s for s in st["services"] if s["role_name"] == "apparmor") + assert all( + mf["path"] != "/etc/cron.d/ntpsec" for mf in svc_apparmor["managed_files"] + ) From ad2abed6127989e62a639874f861acbfaf2e9915 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 29 Dec 2025 14:29:11 +1100 Subject: [PATCH 16/69] Add version CLI arg --- CHANGELOG.md | 4 ++++ enroll/cli.py | 61 ++++++++++++++++++++++++++++------------------- enroll/version.py | 32 +++++++++++++++++++++++++ 3 files changed, 72 insertions(+), 25 deletions(-) create mode 100644 enroll/version.py diff --git a/CHANGELOG.md b/CHANGELOG.md index f2cb109..e07f57b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +# 0.2.0 + + * Add version CLI arg + # 0.1.7 * Fix an attribution bug for certain files ending up in the wrong package/role. diff --git a/enroll/cli.py b/enroll/cli.py index ae9aba0..bb4d3f1 100644 --- a/enroll/cli.py +++ b/enroll/cli.py @@ -15,6 +15,7 @@ from .harvest import harvest from .manifest import manifest from .remote import remote_harvest from .sopsutil import SopsError, encrypt_file_binary +from .version import get_enroll_version def _discover_config_path(argv: list[str]) -> Optional[Path]: @@ -318,13 +319,6 @@ def _jt_mode(args: argparse.Namespace) -> str: return "auto" -def _add_remote_args(p: argparse.ArgumentParser) -> None: - p.add_argument( - "--remote-host", - help="SSH host to run harvesting on (if set, harvest runs remotely and is pulled locally).", - ) - - def _add_config_args(p: argparse.ArgumentParser) -> None: p.add_argument( "-c", @@ -339,6 +333,13 @@ def _add_config_args(p: argparse.ArgumentParser) -> None: action="store_true", help="Do not load any INI config file (even if one would be auto-discovered).", ) + + +def _add_remote_args(p: argparse.ArgumentParser) -> None: + p.add_argument( + "--remote-host", + help="SSH host to run harvesting on (if set, harvest runs remotely and is pulled locally).", + ) p.add_argument( "--remote-port", type=int, @@ -354,11 +355,18 @@ def _add_config_args(p: argparse.ArgumentParser) -> None: def main() -> None: ap = argparse.ArgumentParser(prog="enroll") + ap.add_argument( + "-v", + "--version", + action="version", + version=f"{get_enroll_version()}", + ) _add_config_args(ap) sub = ap.add_subparsers(dest="cmd", required=True) h = sub.add_parser("harvest", help="Harvest service/package/config state") _add_config_args(h) + _add_remote_args(h) h.add_argument( "--out", help=( @@ -406,7 +414,6 @@ def main() -> None: action="store_true", help="Don't use sudo on the remote host (when using --remote options). This may result in a limited harvest due to permission restrictions.", ) - _add_remote_args(h) m = sub.add_parser("manifest", help="Render Ansible roles from a harvest") _add_config_args(m) @@ -443,6 +450,7 @@ def main() -> None: "single-shot", help="Harvest state, then manifest Ansible code, in one shot" ) _add_config_args(s) + _add_remote_args(s) s.add_argument( "--harvest", help=( @@ -500,7 +508,6 @@ def main() -> None: ), ) _add_common_manifest_args(s) - _add_remote_args(s) d = sub.add_parser("diff", help="Compare two harvests and report differences") _add_config_args(d) @@ -602,14 +609,12 @@ def main() -> None: ) args = ap.parse_args(argv) - remote_host: Optional[str] = getattr(args, "remote_host", None) - try: if args.cmd == "harvest": sops_fps = getattr(args, "sops", None) - if remote_host: + if args.remote_host: if sops_fps: - out_file = _resolve_sops_out_file(args.out, hint=remote_host) + out_file = _resolve_sops_out_file(args.out, hint=args.remote_host) with tempfile.TemporaryDirectory(prefix="enroll-harvest-") as td: tmp_bundle = Path(td) / "bundle" tmp_bundle.mkdir(parents=True, exist_ok=True) @@ -619,7 +624,7 @@ def main() -> None: pass remote_harvest( local_out_dir=tmp_bundle, - remote_host=remote_host, + remote_host=args.remote_host, remote_port=int(args.remote_port), remote_user=args.remote_user, dangerous=bool(args.dangerous), @@ -635,11 +640,11 @@ def main() -> None: out_dir = ( Path(args.out) if args.out - else new_harvest_cache_dir(hint=remote_host).dir + else new_harvest_cache_dir(hint=args.remote_host).dir ) state = remote_harvest( local_out_dir=out_dir, - remote_host=remote_host, + remote_host=args.remote_host, remote_port=int(args.remote_port), remote_user=args.remote_user, dangerous=bool(args.dangerous), @@ -669,12 +674,16 @@ def main() -> None: ) print(str(out_file)) else: - if not args.out: - raise SystemExit( - "error: --out is required unless --remote-host is set" + if args.out: + out_dir = args.out + else: + out_dir = ( + Path(args.out) + if args.out + else new_harvest_cache_dir(hint=args.remote_host).dir ) path = harvest( - args.out, + out_dir, dangerous=bool(args.dangerous), include_paths=list(getattr(args, "include_path", []) or []), exclude_paths=list(getattr(args, "exclude_path", []) or []), @@ -747,9 +756,11 @@ def main() -> None: raise SystemExit(2) elif args.cmd == "single-shot": sops_fps = getattr(args, "sops", None) - if remote_host: + if args.remote_host: if sops_fps: - out_file = _resolve_sops_out_file(args.harvest, hint=remote_host) + out_file = _resolve_sops_out_file( + args.harvest, hint=args.remote_host + ) with tempfile.TemporaryDirectory(prefix="enroll-harvest-") as td: tmp_bundle = Path(td) / "bundle" tmp_bundle.mkdir(parents=True, exist_ok=True) @@ -759,7 +770,7 @@ def main() -> None: pass remote_harvest( local_out_dir=tmp_bundle, - remote_host=remote_host, + remote_host=args.remote_host, remote_port=int(args.remote_port), remote_user=args.remote_user, dangerous=bool(args.dangerous), @@ -784,11 +795,11 @@ def main() -> None: harvest_dir = ( Path(args.harvest) if args.harvest - else new_harvest_cache_dir(hint=remote_host).dir + else new_harvest_cache_dir(hint=args.remote_host).dir ) remote_harvest( local_out_dir=harvest_dir, - remote_host=remote_host, + remote_host=args.remote_host, remote_port=int(args.remote_port), remote_user=args.remote_user, dangerous=bool(args.dangerous), diff --git a/enroll/version.py b/enroll/version.py new file mode 100644 index 0000000..bbe78b6 --- /dev/null +++ b/enroll/version.py @@ -0,0 +1,32 @@ +from __future__ import annotations + + +def get_enroll_version() -> str: + """ + Best-effort version lookup that works when installed via: + - poetry/pip/wheel + - deb/rpm system packages + Falls back to "0+unknown" when running from an unpacked source tree. + """ + try: + from importlib.metadata import ( + packages_distributions, + version, + ) + except Exception: + # Very old Python or unusual environment + return "unknown" + + # Map import package -> dist(s) + dist_names = [] + try: + dist_names = (packages_distributions() or {}).get("enroll", []) or [] + except Exception: + dist_names = [] + + # Try mapped dists first, then a reasonable default + for dist in [*dist_names, "enroll"]: + try: + return version(dist) + except Exception: + return "unknown" From 984b0fa81b5b224951816c4dc46a74734b950d07 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 29 Dec 2025 14:59:34 +1100 Subject: [PATCH 17/69] Add ability to enroll RH-style systems (DNF5/DNF/RPM) --- CHANGELOG.md | 1 + README.md | 18 +-- enroll/debian.py | 26 ---- enroll/fsutil.py | 40 ++++++ enroll/harvest.py | 272 ++++++++++++++++++++++++++--------------- enroll/ignore.py | 1 + enroll/manifest.py | 229 ++++++++++++++++++++++++++++++---- enroll/platform.py | 261 +++++++++++++++++++++++++++++++++++++++ enroll/rpm.py | 266 ++++++++++++++++++++++++++++++++++++++++ tests/test_debian.py | 56 --------- tests/test_fsutil.py | 25 ++++ tests/test_harvest.py | 142 +++++++++++++++------ tests/test_manifest.py | 93 ++++++++++++++ tests/test_platform.py | 93 ++++++++++++++ tests/test_rpm.py | 131 ++++++++++++++++++++ 15 files changed, 1400 insertions(+), 254 deletions(-) create mode 100644 enroll/fsutil.py create mode 100644 enroll/platform.py create mode 100644 enroll/rpm.py create mode 100644 tests/test_fsutil.py create mode 100644 tests/test_platform.py create mode 100644 tests/test_rpm.py diff --git a/CHANGELOG.md b/CHANGELOG.md index e07f57b..f92e0b7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # 0.2.0 * Add version CLI arg + * Add ability to enroll RH-style systems (DNF5/DNF/RPM) # 0.1.7 diff --git a/README.md b/README.md index c6b8123..d075951 100644 --- a/README.md +++ b/README.md @@ -4,15 +4,15 @@ Enroll logo -**enroll** inspects a Linux machine (currently Debian-only) and generates Ansible roles/playbooks (and optionally inventory) for what it finds. +**enroll** inspects a Linux machine (Debian-like or RedHat-like) and generates Ansible roles/playbooks (and optionally inventory) for what it finds. - Detects packages that have been installed. -- Detects Debian package ownership of `/etc` files using dpkg’s local database. -- Captures config that has **changed from packaged defaults** (dpkg conffile hashes + package md5sums when available). +- Detects package ownership of `/etc` files where possible +- Captures config that has **changed from packaged defaults** where possible (e.g dpkg conffile hashes + package md5sums when available). - Also captures **service-relevant custom/unowned files** under `/etc//...` (e.g. drop-in config includes). - Defensively excludes likely secrets (path denylist + content sniff + size caps). - Captures non-system users and their SSH public keys. -- Captures miscellaneous `/etc` files it can’t attribute to a package and installs them in an `etc_custom` role. +- Captures miscellaneous `/etc` files it can't attribute to a package and installs them in an `etc_custom` role. - Ditto for /usr/local/bin (for non-binary files) and /usr/local/etc - Avoids trying to start systemd services that were detected as inactive during harvest. @@ -41,8 +41,8 @@ Use when enrolling **one server** (or generating a “golden” role set you int **Characteristics** - Roles are more self-contained. -- Raw config files live in the role’s `files/`. -- Template variables live in the role’s `defaults/main.yml`. +- Raw config files live in the role's `files/`. +- Template variables live in the role's `defaults/main.yml`. ### Multi-site mode (`--fqdn`) Use when enrolling **several existing servers** quickly, especially if they differ. @@ -68,13 +68,13 @@ Harvest state about a host and write a harvest bundle. - “Manual” packages - Changed-from-default config (plus related custom/unowned files under service dirs) - Non-system users + SSH public keys -- Misc `/etc` that can’t be attributed to a package (`etc_custom` role) +- Misc `/etc` that can't be attributed to a package (`etc_custom` role) - Optional user-specified extra files/dirs via `--include-path` (emitted as an `extra_paths` role at manifest time) **Common flags** - Remote harvesting: - `--remote-host`, `--remote-user`, `--remote-port` - - `--no-sudo` (if you don’t want/need sudo) + - `--no-sudo` (if you don't want/need sudo) - Sensitive-data behaviour: - default: tries to avoid likely secrets - `--dangerous`: disables secret-safety checks (see “Sensitive data” below) @@ -233,7 +233,7 @@ poetry run enroll --help ## Found a bug / have a suggestion? -My Forgejo doesn’t currently support federation, so I haven’t opened registration/login for issues. +My Forgejo doesn't currently support federation, so I haven't opened registration/login for issues. Instead, email me (see `pyproject.toml`) or contact me on the Fediverse: diff --git a/enroll/debian.py b/enroll/debian.py index 0ddc1f3..7e1ee2d 100644 --- a/enroll/debian.py +++ b/enroll/debian.py @@ -1,7 +1,6 @@ from __future__ import annotations import glob -import hashlib import os import subprocess # nosec from typing import Dict, List, Optional, Set, Tuple @@ -180,28 +179,3 @@ def read_pkg_md5sums(pkg: str) -> Dict[str, str]: md5, rel = line.split(None, 1) m[rel.strip()] = md5.strip() return m - - -def file_md5(path: str) -> str: - h = hashlib.md5() # nosec - with open(path, "rb") as f: - for chunk in iter(lambda: f.read(1024 * 1024), b""): - h.update(chunk) - return h.hexdigest() - - -def stat_triplet(path: str) -> Tuple[str, str, str]: - st = os.stat(path, follow_symlinks=True) - mode = oct(st.st_mode & 0o777)[2:].zfill(4) - - import pwd, grp - - try: - owner = pwd.getpwuid(st.st_uid).pw_name - except KeyError: - owner = str(st.st_uid) - try: - group = grp.getgrgid(st.st_gid).gr_name - except KeyError: - group = str(st.st_gid) - return owner, group, mode diff --git a/enroll/fsutil.py b/enroll/fsutil.py new file mode 100644 index 0000000..3d18df6 --- /dev/null +++ b/enroll/fsutil.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +import hashlib +import os +from typing import Tuple + + +def file_md5(path: str) -> str: + """Return hex MD5 of a file. + + Used for Debian dpkg baseline comparisons. + """ + h = hashlib.md5() # nosec + with open(path, "rb") as f: + for chunk in iter(lambda: f.read(1024 * 1024), b""): + h.update(chunk) + return h.hexdigest() + + +def stat_triplet(path: str) -> Tuple[str, str, str]: + """Return (owner, group, mode) for a path. + + owner/group are usernames/group names when resolvable, otherwise numeric ids. + mode is a zero-padded octal string (e.g. "0644"). + """ + st = os.stat(path, follow_symlinks=True) + mode = oct(st.st_mode & 0o777)[2:].zfill(4) + + import grp + import pwd + + try: + owner = pwd.getpwuid(st.st_uid).pw_name + except KeyError: + owner = str(st.st_uid) + try: + group = grp.getgrgid(st.st_gid).gr_name + except KeyError: + group = str(st.st_gid) + return owner, group, mode diff --git a/enroll/harvest.py b/enroll/harvest.py index d678b89..bb706b1 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -15,18 +15,12 @@ from .systemd import ( get_timer_info, UnitQueryError, ) -from .debian import ( - build_dpkg_etc_index, - dpkg_owner, - file_md5, - list_manual_packages, - parse_status_conffiles, - read_pkg_md5sums, - stat_triplet, -) +from .fsutil import stat_triplet +from .platform import detect_platform, get_backend from .ignore import IgnorePolicy from .pathfilter import PathFilter, expand_includes from .accounts import collect_non_system_users +from .version import get_enroll_version @dataclass @@ -85,6 +79,14 @@ class AptConfigSnapshot: notes: List[str] +@dataclass +class DnfConfigSnapshot: + role_name: str + managed_files: List[ManagedFile] + excluded: List[ExcludedFile] + notes: List[str] + + @dataclass class EtcCustomSnapshot: role_name: str @@ -158,6 +160,13 @@ SHARED_ETC_TOPDIRS = { "sudoers.d", "sysctl.d", "systemd", + # RPM-family shared trees + "dnf", + "yum", + "yum.repos.d", + "sysconfig", + "pki", + "firewalld", } @@ -314,17 +323,23 @@ def _add_pkgs_from_etc_topdirs( pkgs.add(p) -def _maybe_add_specific_paths(hints: Set[str]) -> List[str]: - paths: List[str] = [] - for h in hints: - paths.extend( - [ - f"/etc/default/{h}", - f"/etc/init.d/{h}", - f"/etc/sysctl.d/{h}.conf", - ] - ) - return paths +def _maybe_add_specific_paths(hints: Set[str], backend) -> List[str]: + # Delegate to backend-specific conventions (e.g. /etc/default on Debian, + # /etc/sysconfig on Fedora/RHEL). Always include sysctl.d. + try: + return backend.specific_paths_for_hints(hints) + except Exception: + # Best-effort fallback (Debian-ish). + paths: List[str] = [] + for h in hints: + paths.extend( + [ + f"/etc/default/{h}", + f"/etc/init.d/{h}", + f"/etc/sysctl.d/{h}.conf", + ] + ) + return paths def _scan_unowned_under_roots( @@ -408,6 +423,7 @@ _SYSTEM_CAPTURE_GLOBS: List[tuple[str, str]] = [ ("/etc/anacron/*", "system_cron"), ("/var/spool/cron/crontabs/*", "system_cron"), ("/var/spool/crontabs/*", "system_cron"), + ("/var/spool/cron/*", "system_cron"), # network ("/etc/netplan/*", "system_network"), ("/etc/systemd/network/*", "system_network"), @@ -415,6 +431,9 @@ _SYSTEM_CAPTURE_GLOBS: List[tuple[str, str]] = [ ("/etc/network/interfaces.d/*", "system_network"), ("/etc/resolvconf.conf", "system_network"), ("/etc/resolvconf/resolv.conf.d/*", "system_network"), + ("/etc/NetworkManager/system-connections/*", "system_network"), + ("/etc/sysconfig/network*", "system_network"), + ("/etc/sysconfig/network-scripts/*", "system_network"), # firewall ("/etc/nftables.conf", "system_firewall"), ("/etc/nftables.d/*", "system_firewall"), @@ -422,6 +441,10 @@ _SYSTEM_CAPTURE_GLOBS: List[tuple[str, str]] = [ ("/etc/iptables/rules.v6", "system_firewall"), ("/etc/ufw/*", "system_firewall"), ("/etc/default/ufw", "system_firewall"), + ("/etc/firewalld/*", "system_firewall"), + ("/etc/firewalld/zones/*", "system_firewall"), + # SELinux + ("/etc/selinux/config", "system_security"), # other ("/etc/rc.local", "system_rc"), ] @@ -553,6 +576,51 @@ def _iter_apt_capture_paths() -> List[tuple[str, str]]: return uniq +def _iter_dnf_capture_paths() -> List[tuple[str, str]]: + """Return (path, reason) pairs for DNF/YUM configuration on RPM systems. + + Captures: + - /etc/dnf/* (dnf.conf, vars, plugins, modules, automatic) + - /etc/yum.conf (legacy) + - /etc/yum.repos.d/*.repo + - /etc/pki/rpm-gpg/* (GPG key files) + """ + reasons: Dict[str, str] = {} + + for root, tag in ( + ("/etc/dnf", "dnf_config"), + ("/etc/yum", "yum_config"), + ): + if os.path.isdir(root): + for dirpath, _, filenames in os.walk(root): + for fn in filenames: + p = os.path.join(dirpath, fn) + if os.path.islink(p) or not os.path.isfile(p): + continue + reasons.setdefault(p, tag) + + # Legacy yum.conf. + if os.path.isfile("/etc/yum.conf") and not os.path.islink("/etc/yum.conf"): + reasons.setdefault("/etc/yum.conf", "yum_conf") + + # Repositories. + if os.path.isdir("/etc/yum.repos.d"): + for p in _iter_matching_files("/etc/yum.repos.d/*.repo"): + reasons[p] = "yum_repo" + + # RPM GPG keys. + if os.path.isdir("/etc/pki/rpm-gpg"): + for dirpath, _, filenames in os.walk("/etc/pki/rpm-gpg"): + for fn in filenames: + p = os.path.join(dirpath, fn) + if os.path.islink(p) or not os.path.isfile(p): + continue + reasons.setdefault(p, "rpm_gpg_key") + + # Stable ordering. + return [(p, reasons[p]) for p in sorted(reasons.keys())] + + def _iter_system_capture_paths() -> List[tuple[str, str]]: """Return (path, reason) pairs for essential system config/state (non-APT).""" out: List[tuple[str, str]] = [] @@ -600,8 +668,12 @@ def harvest( flush=True, ) - owned_etc, etc_owner_map, topdir_to_pkgs, pkg_to_etc_paths = build_dpkg_etc_index() - conffiles_by_pkg = parse_status_conffiles() + platform = detect_platform() + backend = get_backend(platform) + + owned_etc, etc_owner_map, topdir_to_pkgs, pkg_to_etc_paths = ( + backend.build_etc_index() + ) # ------------------------- # Service roles @@ -645,12 +717,12 @@ def harvest( candidates: Dict[str, str] = {} if ui.fragment_path: - p = dpkg_owner(ui.fragment_path) + p = backend.owner_of_path(ui.fragment_path) if p: pkgs.add(p) for exe in ui.exec_paths: - p = dpkg_owner(exe) + p = backend.owner_of_path(exe) if p: pkgs.add(p) @@ -675,7 +747,7 @@ def harvest( # logrotate.d entries) can still be attributed back to this service. service_role_aliases[role] = set(hints) | set(pkgs) | {role} - for sp in _maybe_add_specific_paths(hints): + for sp in _maybe_add_specific_paths(hints, backend): if not os.path.exists(sp): continue if sp in etc_owner_map: @@ -684,31 +756,13 @@ def harvest( candidates.setdefault(sp, "custom_specific_path") for pkg in sorted(pkgs): - conff = conffiles_by_pkg.get(pkg, {}) - md5sums = read_pkg_md5sums(pkg) - for path in pkg_to_etc_paths.get(pkg, []): + etc_paths = pkg_to_etc_paths.get(pkg, []) + for path, reason in backend.modified_paths(pkg, etc_paths).items(): if not os.path.isfile(path) or os.path.islink(path): continue - if path.startswith("/etc/apt/"): + if backend.is_pkg_config_path(path): continue - if path in conff: - # Only capture conffiles when they differ from the package default. - try: - current = file_md5(path) - except OSError: - continue - if current != conff[path]: - candidates.setdefault(path, "modified_conffile") - continue - rel = path.lstrip("/") - baseline = md5sums.get(rel) - if baseline: - try: - current = file_md5(path) - except OSError: - continue - if current != baseline: - candidates.setdefault(path, "modified_packaged_file") + candidates.setdefault(path, reason) # Capture custom/unowned files living under /etc/ for this service. # @@ -847,18 +901,18 @@ def harvest( # (useful when a timer triggers a service that isn't enabled). pkgs: Set[str] = set() if ti.fragment_path: - p = dpkg_owner(ti.fragment_path) + p = backend.owner_of_path(ti.fragment_path) if p: pkgs.add(p) if ti.trigger_unit and ti.trigger_unit.endswith(".service"): try: ui = get_unit_info(ti.trigger_unit) if ui.fragment_path: - p = dpkg_owner(ui.fragment_path) + p = backend.owner_of_path(ui.fragment_path) if p: pkgs.add(p) for exe in ui.exec_paths: - p = dpkg_owner(exe) + p = backend.owner_of_path(exe) if p: pkgs.add(p) except Exception: # nosec @@ -870,7 +924,7 @@ def harvest( # ------------------------- # Manually installed package roles # ------------------------- - manual_pkgs = list_manual_packages() + manual_pkgs = backend.list_manual_packages() # Avoid duplicate roles: if a manual package is already managed by any service role, skip its pkg_ role. covered_by_services: Set[str] = set() for s in service_snaps: @@ -893,41 +947,26 @@ def harvest( for tpath in timer_extra_by_pkg.get(pkg, []): candidates.setdefault(tpath, "related_timer") - conff = conffiles_by_pkg.get(pkg, {}) - md5sums = read_pkg_md5sums(pkg) - - for path in pkg_to_etc_paths.get(pkg, []): + etc_paths = pkg_to_etc_paths.get(pkg, []) + for path, reason in backend.modified_paths(pkg, etc_paths).items(): if not os.path.isfile(path) or os.path.islink(path): continue - if path.startswith("/etc/apt/"): + if backend.is_pkg_config_path(path): continue - if path in conff: - try: - current = file_md5(path) - except OSError: - continue - if current != conff[path]: - candidates.setdefault(path, "modified_conffile") - continue - rel = path.lstrip("/") - baseline = md5sums.get(rel) - if baseline: - try: - current = file_md5(path) - except OSError: - continue - if current != baseline: - candidates.setdefault(path, "modified_packaged_file") + candidates.setdefault(path, reason) topdirs = _topdirs_for_package(pkg, pkg_to_etc_paths) roots: List[str] = [] + # Collect candidate directories plus backend-specific common files. for td in sorted(topdirs): if td in SHARED_ETC_TOPDIRS: continue + if backend.is_pkg_config_path(f"/etc/{td}/") or backend.is_pkg_config_path( + f"/etc/{td}" + ): + continue roots.extend([f"/etc/{td}", f"/etc/{td}.d"]) - roots.extend([f"/etc/default/{td}"]) - roots.extend([f"/etc/init.d/{td}"]) - roots.extend([f"/etc/sysctl.d/{td}.conf"]) + roots.extend(_maybe_add_specific_paths(set(topdirs), backend)) # Capture any custom/unowned files under /etc/ for this # manually-installed package. This may include runtime-generated @@ -1031,26 +1070,48 @@ def harvest( ) # ------------------------- - # apt_config role (APT configuration and keyrings) + # Package manager config role + # - Debian: apt_config + # - Fedora/RHEL-like: dnf_config # ------------------------- apt_notes: List[str] = [] apt_excluded: List[ExcludedFile] = [] apt_managed: List[ManagedFile] = [] - apt_role_name = "apt_config" - apt_role_seen = seen_by_role.setdefault(apt_role_name, set()) + dnf_notes: List[str] = [] + dnf_excluded: List[ExcludedFile] = [] + dnf_managed: List[ManagedFile] = [] - for path, reason in _iter_apt_capture_paths(): - _capture_file( - bundle_dir=bundle_dir, - role_name=apt_role_name, - abs_path=path, - reason=reason, - policy=policy, - path_filter=path_filter, - managed_out=apt_managed, - excluded_out=apt_excluded, - seen_role=apt_role_seen, - ) + apt_role_name = "apt_config" + dnf_role_name = "dnf_config" + + if backend.name == "dpkg": + apt_role_seen = seen_by_role.setdefault(apt_role_name, set()) + for path, reason in _iter_apt_capture_paths(): + _capture_file( + bundle_dir=bundle_dir, + role_name=apt_role_name, + abs_path=path, + reason=reason, + policy=policy, + path_filter=path_filter, + managed_out=apt_managed, + excluded_out=apt_excluded, + seen_role=apt_role_seen, + ) + elif backend.name == "rpm": + dnf_role_seen = seen_by_role.setdefault(dnf_role_name, set()) + for path, reason in _iter_dnf_capture_paths(): + _capture_file( + bundle_dir=bundle_dir, + role_name=dnf_role_name, + abs_path=path, + reason=reason, + policy=policy, + path_filter=path_filter, + managed_out=dnf_managed, + excluded_out=dnf_excluded, + seen_role=dnf_role_seen, + ) apt_config_snapshot = AptConfigSnapshot( role_name=apt_role_name, @@ -1058,6 +1119,12 @@ def harvest( excluded=apt_excluded, notes=apt_notes, ) + dnf_config_snapshot = DnfConfigSnapshot( + role_name=dnf_role_name, + managed_files=dnf_managed, + excluded=dnf_excluded, + notes=dnf_notes, + ) # ------------------------- # etc_custom role (unowned /etc files not already attributed elsewhere) @@ -1079,6 +1146,8 @@ def harvest( already.add(mf.path) for mf in apt_managed: already.add(mf.path) + for mf in dnf_managed: + already.add(mf.path) # Maps for re-attributing shared snippets (cron.d/logrotate.d) to existing roles. svc_by_role: Dict[str, ServiceSnapshot] = {s.role_name: s for s in service_snaps} @@ -1093,7 +1162,7 @@ def harvest( for pkg in s.packages: pkg_to_service_roles.setdefault(pkg, []).append(s.role_name) - # Alias -> role mapping used as a fallback when dpkg ownership is missing. + # Alias -> role mapping used as a fallback when package ownership is missing. # Prefer service roles over package roles when both would match. alias_ranked: Dict[str, tuple[int, str]] = {} @@ -1124,8 +1193,8 @@ def harvest( per service. Resolution order: - 1) dpkg owner -> service role (if any service references the package) - 2) dpkg owner -> package role (manual package role exists) + 1) package owner -> service role (if any service references the package) + 2) package owner -> package role (manual package role exists) 3) basename/stem alias match -> preferred role """ if path.startswith("/etc/logrotate.d/"): @@ -1147,7 +1216,7 @@ def harvest( seen.add(c) uniq.append(c) - pkg = dpkg_owner(path) + pkg = backend.owner_of_path(path) if pkg: svc_roles = sorted(set(pkg_to_service_roles.get(pkg, []))) if svc_roles: @@ -1226,7 +1295,7 @@ def harvest( for dirpath, _, filenames in os.walk("/etc"): for fn in filenames: path = os.path.join(dirpath, fn) - if path.startswith("/etc/apt/"): + if backend.is_pkg_config_path(path): continue if path in already: continue @@ -1413,13 +1482,22 @@ def harvest( ) state = { - "host": {"hostname": os.uname().nodename, "os": "debian"}, + "enroll": { + "version": get_enroll_version(), + }, + "host": { + "hostname": os.uname().nodename, + "os": platform.os_family, + "pkg_backend": backend.name, + "os_release": platform.os_release, + }, "users": asdict(users_snapshot), "services": [asdict(s) for s in service_snaps], "manual_packages": manual_pkgs, "manual_packages_skipped": manual_pkgs_skipped, "package_roles": [asdict(p) for p in pkg_snaps], "apt_config": asdict(apt_config_snapshot), + "dnf_config": asdict(dnf_config_snapshot), "etc_custom": asdict(etc_custom_snapshot), "usr_local_custom": asdict(usr_local_custom_snapshot), "extra_paths": asdict(extra_paths_snapshot), diff --git a/enroll/ignore.py b/enroll/ignore.py index ab2cb96..904997f 100644 --- a/enroll/ignore.py +++ b/enroll/ignore.py @@ -43,6 +43,7 @@ DEFAULT_ALLOW_BINARY_GLOBS = [ "/usr/share/keyrings/*.gpg", "/usr/share/keyrings/*.pgp", "/usr/share/keyrings/*.asc", + "/etc/pki/rpm-gpg/*", ] SENSITIVE_CONTENT_PATTERNS = [ diff --git a/enroll/manifest.py b/enroll/manifest.py index dbc2353..923040f 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -166,6 +166,7 @@ def _write_playbook_all(path: str, roles: List[str]) -> None: pb_lines = [ "---", "- name: Apply all roles on all hosts", + " gather_facts: true", " hosts: all", " become: true", " roles:", @@ -181,6 +182,7 @@ def _write_playbook_host(path: str, fqdn: str, roles: List[str]) -> None: "---", f"- name: Apply all roles on {fqdn}", f" hosts: {fqdn}", + " gather_facts: true", " become: true", " roles:", ] @@ -468,6 +470,51 @@ def _render_generic_files_tasks( """ +def _render_install_packages_tasks(role: str, var_prefix: str) -> str: + """Render cross-distro package installation tasks. + + We generate conditional tasks for apt/dnf/yum, falling back to the + generic `package` module. This keeps generated roles usable on both + Debian-like and RPM-like systems. + """ + return f"""# Generated by enroll + +- name: Install packages for {role} (APT) + ansible.builtin.apt: + name: "{{{{ {var_prefix}_packages | default([]) }}}}" + state: present + update_cache: true + when: + - ({var_prefix}_packages | default([])) | length > 0 + - ansible_facts.pkg_mgr | default('') == 'apt' + +- name: Install packages for {role} (DNF5) + ansible.builtin.dnf5: + name: "{{{{ {var_prefix}_packages | default([]) }}}}" + state: present + when: + - ({var_prefix}_packages | default([])) | length > 0 + - ansible_facts.pkg_mgr | default('') == 'dnf5' + +- name: Install packages for {role} (DNF/YUM) + ansible.builtin.dnf: + name: "{{{{ {var_prefix}_packages | default([]) }}}}" + state: present + when: + - ({var_prefix}_packages | default([])) | length > 0 + - ansible_facts.pkg_mgr | default('') in ['dnf', 'yum'] + +- name: Install packages for {role} (generic fallback) + ansible.builtin.package: + name: "{{{{ {var_prefix}_packages | default([]) }}}}" + state: present + when: + - ({var_prefix}_packages | default([])) | length > 0 + - ansible_facts.pkg_mgr | default('') not in ['apt', 'dnf', 'dnf5', 'yum'] + +""" + + def _prepare_bundle_dir( bundle: str, *, @@ -629,6 +676,7 @@ def _manifest_from_bundle_dir( package_roles: List[Dict[str, Any]] = state.get("package_roles", []) users_snapshot: Dict[str, Any] = state.get("users", {}) apt_config_snapshot: Dict[str, Any] = state.get("apt_config", {}) + dnf_config_snapshot: Dict[str, Any] = state.get("dnf_config", {}) etc_custom_snapshot: Dict[str, Any] = state.get("etc_custom", {}) usr_local_custom_snapshot: Dict[str, Any] = state.get("usr_local_custom", {}) extra_paths_snapshot: Dict[str, Any] = state.get("extra_paths", {}) @@ -664,6 +712,7 @@ def _manifest_from_bundle_dir( manifested_users_roles: List[str] = [] manifested_apt_config_roles: List[str] = [] + manifested_dnf_config_roles: List[str] = [] manifested_etc_custom_roles: List[str] = [] manifested_usr_local_custom_roles: List[str] = [] manifested_extra_paths_roles: List[str] = [] @@ -1041,6 +1090,157 @@ APT configuration harvested from the system (sources, pinning, and keyrings). manifested_apt_config_roles.append(role) + # ------------------------- + # dnf_config role (DNF/YUM repos, config, and RPM GPG keys) + # ------------------------- + if dnf_config_snapshot and dnf_config_snapshot.get("managed_files"): + role = dnf_config_snapshot.get("role_name", "dnf_config") + role_dir = os.path.join(roles_root, role) + _write_role_scaffold(role_dir) + + var_prefix = role + + managed_files = dnf_config_snapshot.get("managed_files", []) + excluded = dnf_config_snapshot.get("excluded", []) + notes = dnf_config_snapshot.get("notes", []) + + templated, jt_vars = _jinjify_managed_files( + bundle_dir, + role, + role_dir, + managed_files, + jt_exe=jt_exe, + jt_enabled=jt_enabled, + overwrite_templates=not site_mode, + ) + + if site_mode: + _copy_artifacts( + bundle_dir, + role, + _host_role_files_dir(out_dir, fqdn or "", role), + exclude_rels=templated, + ) + else: + _copy_artifacts( + bundle_dir, + role, + os.path.join(role_dir, "files"), + exclude_rels=templated, + ) + + files_var = _build_managed_files_var( + managed_files, + templated, + notify_other=None, + notify_systemd=None, + ) + + jt_map = _yaml_load_mapping(jt_vars) if jt_vars.strip() else {} + vars_map: Dict[str, Any] = {f"{var_prefix}_managed_files": files_var} + vars_map = _merge_mappings_overwrite(vars_map, jt_map) + + if site_mode: + _write_role_defaults(role_dir, {f"{var_prefix}_managed_files": []}) + _write_hostvars(out_dir, fqdn or "", role, vars_map) + else: + _write_role_defaults(role_dir, vars_map) + + tasks = "---\n" + _render_generic_files_tasks( + var_prefix, include_restart_notify=False + ) + with open( + os.path.join(role_dir, "tasks", "main.yml"), "w", encoding="utf-8" + ) as f: + f.write(tasks.rstrip() + "\n") + + with open( + os.path.join(role_dir, "meta", "main.yml"), "w", encoding="utf-8" + ) as f: + f.write("---\ndependencies: []\n") + + # README: summarise repos and GPG key material + repo_paths: List[str] = [] + key_paths: List[str] = [] + repo_hosts: Set[str] = set() + + url_re = re.compile(r"(?:https?|ftp)://([^/\s]+)", re.IGNORECASE) + file_url_re = re.compile(r"file://(/[^\s]+)") + + for mf in managed_files: + p = str(mf.get("path") or "") + src_rel = str(mf.get("src_rel") or "") + if not p or not src_rel: + continue + + if p.startswith("/etc/yum.repos.d/") and p.endswith(".repo"): + repo_paths.append(p) + art_path = os.path.join(bundle_dir, "artifacts", role, src_rel) + try: + with open(art_path, "r", encoding="utf-8", errors="replace") as rf: + for line in rf: + s = line.strip() + if not s or s.startswith("#") or s.startswith(";"): + continue + # Collect hostnames from URLs (baseurl, mirrorlist, metalink, gpgkey...) + for m in url_re.finditer(s): + repo_hosts.add(m.group(1)) + # Collect local gpgkey file paths referenced as file:///... + for m in file_url_re.finditer(s): + key_paths.append(m.group(1)) + except OSError: + pass # nosec + + if p.startswith("/etc/pki/rpm-gpg/"): + key_paths.append(p) + + repo_paths = sorted(set(repo_paths)) + key_paths = sorted(set(key_paths)) + repos = sorted(repo_hosts) + + readme = ( + """# dnf_config + +DNF/YUM configuration harvested from the system (repos, config files, and RPM GPG keys). + +## Repository hosts +""" + + ("\n".join([f"- {h}" for h in repos]) or "- (none)") + + """\n +## Repo files +""" + + ("\n".join([f"- {p}" for p in repo_paths]) or "- (none)") + + """\n +## GPG keys +""" + + ("\n".join([f"- {p}" for p in key_paths]) or "- (none)") + + """\n +## Managed files +""" + + ( + "\n".join( + [f"- {mf.get('path')} ({mf.get('reason')})" for mf in managed_files] + ) + or "- (none)" + ) + + """\n +## Excluded +""" + + ( + "\n".join([f"- {e.get('path')} ({e.get('reason')})" for e in excluded]) + or "- (none)" + ) + + """\n +## Notes +""" + + ("\n".join([f"- {n}" for n in notes]) or "- (none)") + + """\n""" + ) + with open(os.path.join(role_dir, "README.md"), "w", encoding="utf-8") as f: + f.write(readme) + + manifested_dnf_config_roles.append(role) + # ------------------------- # etc_custom role (unowned /etc not already attributed) # ------------------------- @@ -1457,19 +1657,7 @@ User-requested extra file harvesting. f.write(handlers) task_parts: List[str] = [] - task_parts.append( - f"""--- -# Generated by enroll - -- name: Install packages for {role} - ansible.builtin.apt: - name: "{{{{ {var_prefix}_packages | default([]) }}}}" - state: present - update_cache: true - when: ({var_prefix}_packages | default([])) | length > 0 - -""" - ) + task_parts.append("---\n" + _render_install_packages_tasks(role, var_prefix)) task_parts.append( _render_generic_files_tasks(var_prefix, include_restart_notify=True) @@ -1616,19 +1804,7 @@ Generated from `{unit}`. f.write(handlers) task_parts: List[str] = [] - task_parts.append( - f"""--- -# Generated by enroll - -- name: Install packages for {role} - ansible.builtin.apt: - name: "{{{{ {var_prefix}_packages | default([]) }}}}" - state: present - update_cache: true - when: ({var_prefix}_packages | default([])) | length > 0 - -""" - ) + task_parts.append("---\n" + _render_install_packages_tasks(role, var_prefix)) task_parts.append( _render_generic_files_tasks(var_prefix, include_restart_notify=False) ) @@ -1667,6 +1843,7 @@ Generated for package `{pkg}`. manifested_pkg_roles.append(role) all_roles = ( manifested_apt_config_roles + + manifested_dnf_config_roles + manifested_pkg_roles + manifested_service_roles + manifested_etc_custom_roles diff --git a/enroll/platform.py b/enroll/platform.py new file mode 100644 index 0000000..998b83d --- /dev/null +++ b/enroll/platform.py @@ -0,0 +1,261 @@ +from __future__ import annotations + +import shutil +from dataclasses import dataclass +from typing import Dict, List, Optional, Set, Tuple + +from .fsutil import file_md5 + + +def _read_os_release(path: str = "/etc/os-release") -> Dict[str, str]: + out: Dict[str, str] = {} + try: + with open(path, "r", encoding="utf-8", errors="replace") as f: + for raw in f: + line = raw.strip() + if not line or line.startswith("#") or "=" not in line: + continue + k, v = line.split("=", 1) + k = k.strip() + v = v.strip().strip('"') + out[k] = v + except OSError: + return {} + return out + + +@dataclass +class PlatformInfo: + os_family: str # debian|redhat|unknown + pkg_backend: str # dpkg|rpm|unknown + os_release: Dict[str, str] + + +def detect_platform() -> PlatformInfo: + """Detect platform family and package backend. + + Uses /etc/os-release when available, with a conservative fallback to + checking for dpkg/rpm binaries. + """ + + osr = _read_os_release() + os_id = (osr.get("ID") or "").strip().lower() + likes = (osr.get("ID_LIKE") or "").strip().lower().split() + + deb_ids = {"debian", "ubuntu", "linuxmint", "raspbian", "kali"} + rhel_ids = { + "fedora", + "rhel", + "centos", + "rocky", + "almalinux", + "ol", + "oracle", + "scientific", + } + + if os_id in deb_ids or "debian" in likes: + return PlatformInfo(os_family="debian", pkg_backend="dpkg", os_release=osr) + if os_id in rhel_ids or any( + x in likes for x in ("rhel", "fedora", "centos", "redhat") + ): + return PlatformInfo(os_family="redhat", pkg_backend="rpm", os_release=osr) + + # Fallback heuristics. + if shutil.which("dpkg"): + return PlatformInfo(os_family="debian", pkg_backend="dpkg", os_release=osr) + if shutil.which("rpm"): + return PlatformInfo(os_family="redhat", pkg_backend="rpm", os_release=osr) + return PlatformInfo(os_family="unknown", pkg_backend="unknown", os_release=osr) + + +class PackageBackend: + """Backend abstraction for package ownership, config detection, and manual package lists.""" + + name: str + pkg_config_prefixes: Tuple[str, ...] + + def owner_of_path(self, path: str) -> Optional[str]: # pragma: no cover + raise NotImplementedError + + def list_manual_packages(self) -> List[str]: # pragma: no cover + raise NotImplementedError + + def build_etc_index( + self, + ) -> Tuple[ + Set[str], Dict[str, str], Dict[str, Set[str]], Dict[str, List[str]] + ]: # pragma: no cover + raise NotImplementedError + + def specific_paths_for_hints(self, hints: Set[str]) -> List[str]: + return [] + + def is_pkg_config_path(self, path: str) -> bool: + for pfx in self.pkg_config_prefixes: + if path == pfx or path.startswith(pfx): + return True + return False + + def modified_paths(self, pkg: str, etc_paths: List[str]) -> Dict[str, str]: + """Return a mapping of modified file paths -> reason label.""" + return {} + + +class DpkgBackend(PackageBackend): + name = "dpkg" + pkg_config_prefixes = ("/etc/apt/",) + + def __init__(self) -> None: + from .debian import parse_status_conffiles + + self._conffiles_by_pkg = parse_status_conffiles() + + def owner_of_path(self, path: str) -> Optional[str]: + from .debian import dpkg_owner + + return dpkg_owner(path) + + def list_manual_packages(self) -> List[str]: + from .debian import list_manual_packages + + return list_manual_packages() + + def build_etc_index(self): + from .debian import build_dpkg_etc_index + + return build_dpkg_etc_index() + + def specific_paths_for_hints(self, hints: Set[str]) -> List[str]: + paths: List[str] = [] + for h in hints: + paths.extend( + [ + f"/etc/default/{h}", + f"/etc/init.d/{h}", + f"/etc/sysctl.d/{h}.conf", + ] + ) + return paths + + def modified_paths(self, pkg: str, etc_paths: List[str]) -> Dict[str, str]: + from .debian import read_pkg_md5sums + + out: Dict[str, str] = {} + conff = self._conffiles_by_pkg.get(pkg, {}) + md5sums = read_pkg_md5sums(pkg) + + for path in etc_paths: + if not path.startswith("/etc/"): + continue + if self.is_pkg_config_path(path): + continue + if path in conff: + try: + current = file_md5(path) + except OSError: + continue + if current != conff[path]: + out[path] = "modified_conffile" + continue + + rel = path.lstrip("/") + baseline = md5sums.get(rel) + if baseline: + try: + current = file_md5(path) + except OSError: + continue + if current != baseline: + out[path] = "modified_packaged_file" + return out + + +class RpmBackend(PackageBackend): + name = "rpm" + pkg_config_prefixes = ( + "/etc/dnf/", + "/etc/yum/", + "/etc/yum.repos.d/", + "/etc/yum.conf", + ) + + def __init__(self) -> None: + self._modified_cache: Dict[str, Set[str]] = {} + self._config_cache: Dict[str, Set[str]] = {} + + def owner_of_path(self, path: str) -> Optional[str]: + from .rpm import rpm_owner + + return rpm_owner(path) + + def list_manual_packages(self) -> List[str]: + from .rpm import list_manual_packages + + return list_manual_packages() + + def build_etc_index(self): + from .rpm import build_rpm_etc_index + + return build_rpm_etc_index() + + def specific_paths_for_hints(self, hints: Set[str]) -> List[str]: + paths: List[str] = [] + for h in hints: + paths.extend( + [ + f"/etc/sysconfig/{h}", + f"/etc/sysconfig/{h}.conf", + f"/etc/sysctl.d/{h}.conf", + ] + ) + return paths + + def _config_files(self, pkg: str) -> Set[str]: + if pkg in self._config_cache: + return self._config_cache[pkg] + from .rpm import rpm_config_files + + s = rpm_config_files(pkg) + self._config_cache[pkg] = s + return s + + def _modified_files(self, pkg: str) -> Set[str]: + if pkg in self._modified_cache: + return self._modified_cache[pkg] + from .rpm import rpm_modified_files + + s = rpm_modified_files(pkg) + self._modified_cache[pkg] = s + return s + + def modified_paths(self, pkg: str, etc_paths: List[str]) -> Dict[str, str]: + out: Dict[str, str] = {} + modified = self._modified_files(pkg) + if not modified: + return out + config = self._config_files(pkg) + + for path in etc_paths: + if not path.startswith("/etc/"): + continue + if self.is_pkg_config_path(path): + continue + if path not in modified: + continue + out[path] = ( + "modified_conffile" if path in config else "modified_packaged_file" + ) + return out + + +def get_backend(info: Optional[PlatformInfo] = None) -> PackageBackend: + info = info or detect_platform() + if info.pkg_backend == "dpkg": + return DpkgBackend() + if info.pkg_backend == "rpm": + return RpmBackend() + # Unknown: be conservative and use an rpm backend if rpm exists, otherwise dpkg. + if shutil.which("rpm"): + return RpmBackend() + return DpkgBackend() diff --git a/enroll/rpm.py b/enroll/rpm.py new file mode 100644 index 0000000..947617c --- /dev/null +++ b/enroll/rpm.py @@ -0,0 +1,266 @@ +from __future__ import annotations + +import os +import re +import shutil +import subprocess # nosec +from typing import Dict, List, Optional, Set, Tuple + + +def _run( + cmd: list[str], *, allow_fail: bool = False, merge_err: bool = False +) -> tuple[int, str]: + """Run a command and return (rc, stdout). + + If merge_err is True, stderr is merged into stdout to preserve ordering. + """ + p = subprocess.run( + cmd, + check=False, + text=True, + stdout=subprocess.PIPE, + stderr=(subprocess.STDOUT if merge_err else subprocess.PIPE), + ) # nosec + out = p.stdout or "" + if (not allow_fail) and p.returncode != 0: + err = "" if merge_err else (p.stderr or "") + raise RuntimeError(f"Command failed: {cmd}\n{err}{out}") + return p.returncode, out + + +def rpm_owner(path: str) -> Optional[str]: + """Return owning package name for a path, or None if unowned.""" + if not path: + return None + rc, out = _run( + ["rpm", "-qf", "--qf", "%{NAME}\n", path], allow_fail=True, merge_err=True + ) + if rc != 0: + return None + for line in out.splitlines(): + line = line.strip() + if not line: + continue + if "is not owned" in line: + return None + # With --qf we expect just the package name. + if re.match(r"^[A-Za-z0-9_.+:-]+$", line): + # Strip any accidental epoch/name-version-release output. + return line.split(":", 1)[-1].strip() if line else None + return None + + +_ARCH_SUFFIXES = { + "noarch", + "x86_64", + "i686", + "aarch64", + "armv7hl", + "ppc64le", + "s390x", + "riscv64", +} + + +def _strip_arch(token: str) -> str: + """Strip a trailing .ARCH from a yum/dnf package token.""" + t = token.strip() + if "." not in t: + return t + head, tail = t.rsplit(".", 1) + if tail in _ARCH_SUFFIXES: + return head + return t + + +def list_manual_packages() -> List[str]: + """Return packages considered "user-installed" on RPM-based systems. + + Best-effort: + 1) dnf repoquery --userinstalled + 2) dnf history userinstalled + 3) yum history userinstalled + + If none are available, returns an empty list. + """ + + def _dedupe(pkgs: List[str]) -> List[str]: + return sorted({p for p in (pkgs or []) if p}) + + if shutil.which("dnf"): + # Prefer a machine-friendly output. + for cmd in ( + ["dnf", "-q", "repoquery", "--userinstalled", "--qf", "%{name}\n"], + ["dnf", "-q", "repoquery", "--userinstalled"], + ): + rc, out = _run(cmd, allow_fail=True, merge_err=True) + if rc == 0 and out.strip(): + pkgs = [] + for line in out.splitlines(): + line = line.strip() + if not line or line.startswith("Loaded plugins"): + continue + pkgs.append(_strip_arch(line.split()[0])) + if pkgs: + return _dedupe(pkgs) + + # Fallback: human-oriented output. + rc, out = _run( + ["dnf", "-q", "history", "userinstalled"], allow_fail=True, merge_err=True + ) + if rc == 0 and out.strip(): + pkgs = [] + for line in out.splitlines(): + line = line.strip() + if not line or line.startswith("Installed") or line.startswith("Last"): + continue + # Often: "vim-enhanced.x86_64" + tok = line.split()[0] + pkgs.append(_strip_arch(tok)) + if pkgs: + return _dedupe(pkgs) + + if shutil.which("yum"): + rc, out = _run( + ["yum", "-q", "history", "userinstalled"], allow_fail=True, merge_err=True + ) + if rc == 0 and out.strip(): + pkgs = [] + for line in out.splitlines(): + line = line.strip() + if ( + not line + or line.startswith("Installed") + or line.startswith("Loaded") + ): + continue + tok = line.split()[0] + pkgs.append(_strip_arch(tok)) + if pkgs: + return _dedupe(pkgs) + + return [] + + +def _walk_etc_files() -> List[str]: + out: List[str] = [] + for dirpath, _, filenames in os.walk("/etc"): + for fn in filenames: + p = os.path.join(dirpath, fn) + if os.path.islink(p) or not os.path.isfile(p): + continue + out.append(p) + return out + + +def build_rpm_etc_index() -> ( + Tuple[Set[str], Dict[str, str], Dict[str, Set[str]], Dict[str, List[str]]] +): + """Best-effort equivalent of build_dpkg_etc_index for RPM systems. + + This builds indexes by walking the live /etc tree and querying RPM ownership + for each file. + + Returns: + owned_etc_paths: set of /etc paths owned by rpm + etc_owner_map: /etc/path -> pkg + topdir_to_pkgs: "nginx" -> {"nginx", ...} based on /etc//... + pkg_to_etc_paths: pkg -> list of owned /etc paths + """ + + owned: Set[str] = set() + owner: Dict[str, str] = {} + topdir_to_pkgs: Dict[str, Set[str]] = {} + pkg_to_etc: Dict[str, List[str]] = {} + + paths = _walk_etc_files() + + # Query in chunks to avoid excessive process spawns. + chunk_size = 250 + + not_owned_re = re.compile( + r"^file\s+(?P.+?)\s+is\s+not\s+owned\s+by\s+any\s+package", re.IGNORECASE + ) + + for i in range(0, len(paths), chunk_size): + chunk = paths[i : i + chunk_size] + rc, out = _run( + ["rpm", "-qf", "--qf", "%{NAME}\n", *chunk], + allow_fail=True, + merge_err=True, + ) + + lines = [ln.strip() for ln in out.splitlines() if ln.strip()] + # Heuristic: rpm prints one output line per input path. If that isn't + # true (warnings/errors), fall back to per-file queries for this chunk. + if len(lines) != len(chunk): + for p in chunk: + pkg = rpm_owner(p) + if not pkg: + continue + owned.add(p) + owner.setdefault(p, pkg) + pkg_to_etc.setdefault(pkg, []).append(p) + parts = p.split("/", 3) + if len(parts) >= 3 and parts[2]: + topdir_to_pkgs.setdefault(parts[2], set()).add(pkg) + continue + + for pth, line in zip(chunk, lines): + if not line: + continue + if not_owned_re.match(line) or "is not owned" in line: + continue + pkg = line.split()[0].strip() + if not pkg: + continue + owned.add(pth) + owner.setdefault(pth, pkg) + pkg_to_etc.setdefault(pkg, []).append(pth) + parts = pth.split("/", 3) + if len(parts) >= 3 and parts[2]: + topdir_to_pkgs.setdefault(parts[2], set()).add(pkg) + + for k, v in list(pkg_to_etc.items()): + pkg_to_etc[k] = sorted(set(v)) + + return owned, owner, topdir_to_pkgs, pkg_to_etc + + +def rpm_config_files(pkg: str) -> Set[str]: + """Return config files for a package (rpm -qc).""" + rc, out = _run(["rpm", "-qc", pkg], allow_fail=True, merge_err=True) + if rc != 0: + return set() + files: Set[str] = set() + for line in out.splitlines(): + line = line.strip() + if line.startswith("/"): + files.add(line) + return files + + +def rpm_modified_files(pkg: str) -> Set[str]: + """Return files reported as modified by rpm verification (rpm -V). + + rpm -V only prints lines for differences/missing files. + """ + rc, out = _run(["rpm", "-V", pkg], allow_fail=True, merge_err=True) + # rc is non-zero when there are differences; we still want the output. + files: Set[str] = set() + for raw in out.splitlines(): + line = raw.strip() + if not line: + continue + # Typical forms: + # S.5....T. c /etc/foo.conf + # missing /etc/bar + m = re.search(r"\s(/\S+)$", line) + if m: + files.add(m.group(1)) + continue + if line.startswith("missing"): + parts = line.split() + if parts and parts[-1].startswith("/"): + files.add(parts[-1]) + return files diff --git a/tests/test_debian.py b/tests/test_debian.py index 333afc1..abad361 100644 --- a/tests/test_debian.py +++ b/tests/test_debian.py @@ -1,6 +1,5 @@ from __future__ import annotations -import hashlib from pathlib import Path @@ -97,58 +96,3 @@ def test_parse_status_conffiles_handles_continuations(tmp_path: Path): assert m["nginx"]["/etc/nginx/nginx.conf"] == "abcdef" assert m["nginx"]["/etc/nginx/mime.types"] == "123456" assert "other" not in m - - -def test_read_pkg_md5sums_and_file_md5(tmp_path: Path, monkeypatch): - import enroll.debian as d - - # Patch /var/lib/dpkg/info/.md5sums lookup to a tmp file. - md5_file = tmp_path / "pkg.md5sums" - md5_file.write_text("0123456789abcdef etc/foo.conf\n", encoding="utf-8") - - def fake_exists(path: str) -> bool: - return path.endswith("/var/lib/dpkg/info/p1.md5sums") - - real_open = open - - def fake_open(path: str, *args, **kwargs): - if path.endswith("/var/lib/dpkg/info/p1.md5sums"): - return real_open(md5_file, *args, **kwargs) - return real_open(path, *args, **kwargs) - - monkeypatch.setattr(d.os.path, "exists", fake_exists) - monkeypatch.setattr("builtins.open", fake_open) - - m = d.read_pkg_md5sums("p1") - assert m == {"etc/foo.conf": "0123456789abcdef"} - - content = b"hello world\n" - p = tmp_path / "x" - p.write_bytes(content) - assert d.file_md5(str(p)) == hashlib.md5(content).hexdigest() - - -def test_stat_triplet_fallbacks(tmp_path: Path, monkeypatch): - import enroll.debian as d - import sys - - p = tmp_path / "f" - p.write_text("x", encoding="utf-8") - - class FakePwdMod: - @staticmethod - def getpwuid(_): # pragma: no cover - raise KeyError - - class FakeGrpMod: - @staticmethod - def getgrgid(_): # pragma: no cover - raise KeyError - - # stat_triplet imports pwd/grp inside the function, so patch sys.modules. - monkeypatch.setitem(sys.modules, "pwd", FakePwdMod) - monkeypatch.setitem(sys.modules, "grp", FakeGrpMod) - owner, group, mode = d.stat_triplet(str(p)) - assert owner.isdigit() - assert group.isdigit() - assert mode.isdigit() and len(mode) == 4 diff --git a/tests/test_fsutil.py b/tests/test_fsutil.py new file mode 100644 index 0000000..ebe2224 --- /dev/null +++ b/tests/test_fsutil.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +import hashlib +import os +from pathlib import Path + +from enroll.fsutil import file_md5, stat_triplet + + +def test_file_md5_matches_hashlib(tmp_path: Path): + p = tmp_path / "x" + p.write_bytes(b"hello world") + expected = hashlib.md5(b"hello world").hexdigest() # nosec + assert file_md5(str(p)) == expected + + +def test_stat_triplet_reports_mode(tmp_path: Path): + p = tmp_path / "x" + p.write_text("x", encoding="utf-8") + os.chmod(p, 0o600) + + owner, group, mode = stat_triplet(str(p)) + assert mode == "0600" + assert owner # non-empty string + assert group # non-empty string diff --git a/tests/test_harvest.py b/tests/test_harvest.py index fa796f0..a0d22ec 100644 --- a/tests/test_harvest.py +++ b/tests/test_harvest.py @@ -2,6 +2,7 @@ import json from pathlib import Path import enroll.harvest as h +from enroll.platform import PlatformInfo from enroll.systemd import UnitInfo @@ -10,6 +11,64 @@ class AllowAllPolicy: return None +class FakeBackend: + """Minimal backend stub for harvest tests. + + The real backends (dpkg/rpm) enumerate the live system (dpkg status, rpm + databases, etc). These tests instead control all backend behaviour. + """ + + def __init__( + self, + *, + name: str, + owned_etc: set[str], + etc_owner_map: dict[str, str], + topdir_to_pkgs: dict[str, set[str]], + pkg_to_etc_paths: dict[str, list[str]], + manual_pkgs: list[str], + owner_fn, + modified_by_pkg: dict[str, dict[str, str]] | None = None, + pkg_config_prefixes: tuple[str, ...] = ("/etc/apt/",), + ): + self.name = name + self.pkg_config_prefixes = pkg_config_prefixes + self._owned_etc = owned_etc + self._etc_owner_map = etc_owner_map + self._topdir_to_pkgs = topdir_to_pkgs + self._pkg_to_etc_paths = pkg_to_etc_paths + self._manual = manual_pkgs + self._owner_fn = owner_fn + self._modified_by_pkg = modified_by_pkg or {} + + def build_etc_index(self): + return ( + self._owned_etc, + self._etc_owner_map, + self._topdir_to_pkgs, + self._pkg_to_etc_paths, + ) + + def owner_of_path(self, path: str): + return self._owner_fn(path) + + def list_manual_packages(self): + return list(self._manual) + + def specific_paths_for_hints(self, hints: set[str]): + return [] + + def is_pkg_config_path(self, path: str) -> bool: + for pfx in self.pkg_config_prefixes: + if path == pfx or path.startswith(pfx): + return True + return False + + def modified_paths(self, pkg: str, etc_paths: list[str]): + # Test-controlled; ignore etc_paths. + return dict(self._modified_by_pkg.get(pkg, {})) + + def test_harvest_dedup_manual_packages_and_builds_etc_custom( monkeypatch, tmp_path: Path ): @@ -22,7 +81,7 @@ def test_harvest_dedup_manual_packages_and_builds_etc_custom( real_exists = os.path.exists real_islink = os.path.islink - # Fake filesystem: two /etc files exist, only one is dpkg-owned. + # Fake filesystem: two /etc files exist, only one is package-owned. # Also include some /usr/local files to populate usr_local_custom. files = { "/etc/openvpn/server.conf": b"server", @@ -93,6 +152,7 @@ def test_harvest_dedup_manual_packages_and_builds_etc_custom( # Avoid real system access monkeypatch.setattr(h, "list_enabled_services", lambda: ["openvpn.service"]) + monkeypatch.setattr(h, "list_enabled_timers", lambda: []) monkeypatch.setattr( h, "get_unit_info", @@ -109,29 +169,30 @@ def test_harvest_dedup_manual_packages_and_builds_etc_custom( ), ) - # Debian package index: openvpn owns /etc/openvpn/server.conf; keyboard is unowned. - def fake_build_index(): - owned_etc = {"/etc/openvpn/server.conf"} - etc_owner_map = {"/etc/openvpn/server.conf": "openvpn"} - topdir_to_pkgs = {"openvpn": {"openvpn"}} - pkg_to_etc_paths = {"openvpn": ["/etc/openvpn/server.conf"], "curl": []} - return owned_etc, etc_owner_map, topdir_to_pkgs, pkg_to_etc_paths + # Package index: openvpn owns /etc/openvpn/server.conf; keyboard is unowned. + owned_etc = {"/etc/openvpn/server.conf"} + etc_owner_map = {"/etc/openvpn/server.conf": "openvpn"} + topdir_to_pkgs = {"openvpn": {"openvpn"}} + pkg_to_etc_paths = {"openvpn": ["/etc/openvpn/server.conf"], "curl": []} - monkeypatch.setattr(h, "build_dpkg_etc_index", fake_build_index) - - # openvpn conffile hash mismatch => should be captured under service role - monkeypatch.setattr( - h, - "parse_status_conffiles", - lambda: {"openvpn": {"/etc/openvpn/server.conf": "old"}}, + backend = FakeBackend( + name="dpkg", + owned_etc=owned_etc, + etc_owner_map=etc_owner_map, + topdir_to_pkgs=topdir_to_pkgs, + pkg_to_etc_paths=pkg_to_etc_paths, + manual_pkgs=["openvpn", "curl"], + owner_fn=lambda p: "openvpn" if "openvpn" in (p or "") else None, + modified_by_pkg={ + "openvpn": {"/etc/openvpn/server.conf": "modified_conffile"}, + }, ) - monkeypatch.setattr(h, "read_pkg_md5sums", lambda pkg: {}) - monkeypatch.setattr(h, "file_md5", lambda path: "new") monkeypatch.setattr( - h, "dpkg_owner", lambda p: "openvpn" if "openvpn" in p else None + h, "detect_platform", lambda: PlatformInfo("debian", "dpkg", {}) ) - monkeypatch.setattr(h, "list_manual_packages", lambda: ["openvpn", "curl"]) + monkeypatch.setattr(h, "get_backend", lambda info=None: backend) + monkeypatch.setattr(h, "collect_non_system_users", lambda: []) def fake_stat_triplet(p: str): @@ -207,6 +268,7 @@ def test_shared_cron_snippet_prefers_matching_role_over_lexicographic( monkeypatch.setattr( h, "list_enabled_services", lambda: ["apparmor.service", "ntpsec.service"] ) + monkeypatch.setattr(h, "list_enabled_timers", lambda: []) def fake_unit_info(unit: str) -> UnitInfo: if unit == "apparmor.service": @@ -235,31 +297,35 @@ def test_shared_cron_snippet_prefers_matching_role_over_lexicographic( monkeypatch.setattr(h, "get_unit_info", fake_unit_info) - # Dpkg /etc index: no owned /etc paths needed for this test. - monkeypatch.setattr( - h, - "build_dpkg_etc_index", - lambda: (set(), {}, {}, {}), - ) - monkeypatch.setattr(h, "parse_status_conffiles", lambda: {}) - monkeypatch.setattr(h, "read_pkg_md5sums", lambda pkg: {}) - monkeypatch.setattr(h, "file_md5", lambda path: "x") - monkeypatch.setattr(h, "list_manual_packages", lambda: []) - monkeypatch.setattr(h, "collect_non_system_users", lambda: []) - # Make apparmor *also* claim the ntpsec package (simulates overly-broad # package inference). The snippet routing should still prefer role 'ntpsec'. - def fake_dpkg_owner(p: str): + def fake_owner(p: str): if p == "/etc/cron.d/ntpsec": return "ntpsec" - if "apparmor" in p: + if "apparmor" in (p or ""): return "ntpsec" # intentionally misleading - if "ntpsec" in p or "ntpd" in p: + if "ntpsec" in (p or "") or "ntpd" in (p or ""): return "ntpsec" return None - monkeypatch.setattr(h, "dpkg_owner", fake_dpkg_owner) + backend = FakeBackend( + name="dpkg", + owned_etc=set(), + etc_owner_map={}, + topdir_to_pkgs={}, + pkg_to_etc_paths={}, + manual_pkgs=[], + owner_fn=fake_owner, + modified_by_pkg={}, + ) + + monkeypatch.setattr( + h, "detect_platform", lambda: PlatformInfo("debian", "dpkg", {}) + ) + monkeypatch.setattr(h, "get_backend", lambda info=None: backend) + monkeypatch.setattr(h, "stat_triplet", lambda p: ("root", "root", "0644")) + monkeypatch.setattr(h, "collect_non_system_users", lambda: []) def fake_copy(bundle_dir: str, role_name: str, abs_path: str, src_rel: str): dst = Path(bundle_dir) / "artifacts" / role_name / src_rel @@ -268,11 +334,7 @@ def test_shared_cron_snippet_prefers_matching_role_over_lexicographic( monkeypatch.setattr(h, "_copy_into_bundle", fake_copy) - class AllowAll: - def deny_reason(self, path: str): - return None - - state_path = h.harvest(str(bundle), policy=AllowAll()) + state_path = h.harvest(str(bundle), policy=AllowAllPolicy()) st = json.loads(Path(state_path).read_text(encoding="utf-8")) # Cron snippet should end up attached to the ntpsec role, not apparmor. diff --git a/tests/test_manifest.py b/tests/test_manifest.py index 92c3dfc..cbfc208 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -322,3 +322,96 @@ def test_copy2_replace_overwrites_readonly_destination(tmp_path: Path): assert dst.read_text(encoding="utf-8") == "new" mode = stat.S_IMODE(dst.stat().st_mode) assert mode & stat.S_IWUSR # destination should remain mergeable + + +def test_manifest_includes_dnf_config_role_when_present(tmp_path: Path): + bundle = tmp_path / "bundle" + out = tmp_path / "ansible" + + # Create a dnf_config artifact. + (bundle / "artifacts" / "dnf_config" / "etc" / "dnf").mkdir( + parents=True, exist_ok=True + ) + (bundle / "artifacts" / "dnf_config" / "etc" / "dnf" / "dnf.conf").write_text( + "[main]\n", encoding="utf-8" + ) + + state = { + "host": {"hostname": "test", "os": "redhat", "pkg_backend": "rpm"}, + "users": { + "role_name": "users", + "users": [], + "managed_files": [], + "excluded": [], + "notes": [], + }, + "services": [], + "package_roles": [], + "manual_packages": [], + "manual_packages_skipped": [], + "apt_config": { + "role_name": "apt_config", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "dnf_config": { + "role_name": "dnf_config", + "managed_files": [ + { + "path": "/etc/dnf/dnf.conf", + "src_rel": "etc/dnf/dnf.conf", + "owner": "root", + "group": "root", + "mode": "0644", + "reason": "dnf_config", + } + ], + "excluded": [], + "notes": [], + }, + "etc_custom": { + "role_name": "etc_custom", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "usr_local_custom": { + "role_name": "usr_local_custom", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "extra_paths": { + "role_name": "extra_paths", + "include_patterns": [], + "exclude_patterns": [], + "managed_files": [], + "excluded": [], + "notes": [], + }, + } + + bundle.mkdir(parents=True, exist_ok=True) + (bundle / "state.json").write_text(json.dumps(state, indent=2), encoding="utf-8") + + manifest(str(bundle), str(out)) + + pb = (out / "playbook.yml").read_text(encoding="utf-8") + assert "- dnf_config" in pb + + tasks = (out / "roles" / "dnf_config" / "tasks" / "main.yml").read_text( + encoding="utf-8" + ) + # Ensure the role exists and contains some file deployment logic. + assert "Deploy any other managed files" in tasks + + +def test_render_install_packages_tasks_contains_dnf_branch(): + from enroll.manifest import _render_install_packages_tasks + + txt = _render_install_packages_tasks("role", "role") + assert "ansible.builtin.apt" in txt + assert "ansible.builtin.dnf" in txt + assert "ansible.builtin.package" in txt + assert "pkg_mgr" in txt diff --git a/tests/test_platform.py b/tests/test_platform.py new file mode 100644 index 0000000..7ff66c6 --- /dev/null +++ b/tests/test_platform.py @@ -0,0 +1,93 @@ +from __future__ import annotations + +from pathlib import Path + +import enroll.platform as platform + + +def test_read_os_release_parses_kv_and_strips_quotes(tmp_path: Path): + p = tmp_path / "os-release" + p.write_text( + """ +# comment +ID=fedora +ID_LIKE=\"rhel centos\" +NAME=\"Fedora Linux\" +EMPTY= +NOEQUALS +""", + encoding="utf-8", + ) + + osr = platform._read_os_release(str(p)) + assert osr["ID"] == "fedora" + assert osr["ID_LIKE"] == "rhel centos" + assert osr["NAME"] == "Fedora Linux" + assert osr["EMPTY"] == "" + assert "NOEQUALS" not in osr + + +def test_detect_platform_prefers_os_release(monkeypatch): + monkeypatch.setattr( + platform, + "_read_os_release", + lambda path="/etc/os-release": {"ID": "fedora", "ID_LIKE": "rhel"}, + ) + # If os-release is decisive we shouldn't need which() + monkeypatch.setattr(platform.shutil, "which", lambda exe: None) + + info = platform.detect_platform() + assert info.os_family == "redhat" + assert info.pkg_backend == "rpm" + + +def test_detect_platform_fallbacks_to_dpkg_when_unknown(monkeypatch): + monkeypatch.setattr(platform, "_read_os_release", lambda path="/etc/os-release": {}) + monkeypatch.setattr( + platform.shutil, "which", lambda exe: "/usr/bin/dpkg" if exe == "dpkg" else None + ) + + info = platform.detect_platform() + assert info.os_family == "debian" + assert info.pkg_backend == "dpkg" + + +def test_get_backend_unknown_prefers_rpm_if_present(monkeypatch): + monkeypatch.setattr( + platform.shutil, "which", lambda exe: "/usr/bin/rpm" if exe == "rpm" else None + ) + + b = platform.get_backend( + platform.PlatformInfo(os_family="unknown", pkg_backend="unknown", os_release={}) + ) + assert isinstance(b, platform.RpmBackend) + + +def test_rpm_backend_modified_paths_labels_conffiles(monkeypatch): + b = platform.RpmBackend() + + # Pretend rpm -V says both files changed, but only one is a config file. + monkeypatch.setattr(b, "_modified_files", lambda pkg: {"/etc/foo.conf", "/etc/bar"}) + monkeypatch.setattr(b, "_config_files", lambda pkg: {"/etc/foo.conf"}) + + out = b.modified_paths("mypkg", ["/etc/foo.conf", "/etc/bar", "/etc/dnf/dnf.conf"]) + assert out["/etc/foo.conf"] == "modified_conffile" + assert out["/etc/bar"] == "modified_packaged_file" + # Package-manager config paths are excluded. + assert "/etc/dnf/dnf.conf" not in out + + +def test_specific_paths_for_hints_differs_between_backends(): + # We can exercise this without instantiating DpkgBackend (which reads dpkg status) + class Dummy(platform.PackageBackend): + name = "dummy" + pkg_config_prefixes = ("/etc/apt/",) + + d = Dummy() + assert d.is_pkg_config_path("/etc/apt/sources.list") + assert not d.is_pkg_config_path("/etc/ssh/sshd_config") + + r = platform.RpmBackend() + paths = set(r.specific_paths_for_hints({"nginx"})) + assert "/etc/sysconfig/nginx" in paths + assert "/etc/sysconfig/nginx.conf" in paths diff --git a/tests/test_rpm.py b/tests/test_rpm.py new file mode 100644 index 0000000..ea97c12 --- /dev/null +++ b/tests/test_rpm.py @@ -0,0 +1,131 @@ +from __future__ import annotations + +import enroll.rpm as rpm + + +def test_rpm_owner_returns_none_when_unowned(monkeypatch): + monkeypatch.setattr( + rpm, + "_run", + lambda cmd, allow_fail=False, merge_err=False: ( + 1, + "file /etc/x is not owned by any package\n", + ), + ) + assert rpm.rpm_owner("/etc/x") is None + + +def test_rpm_owner_parses_name(monkeypatch): + monkeypatch.setattr( + rpm, "_run", lambda cmd, allow_fail=False, merge_err=False: (0, "bash\n") + ) + assert rpm.rpm_owner("/bin/bash") == "bash" + + +def test_strip_arch_strips_known_arches(): + assert rpm._strip_arch("vim-enhanced.x86_64") == "vim-enhanced" + assert rpm._strip_arch("foo.noarch") == "foo" + assert rpm._strip_arch("weird.token") == "weird.token" + + +def test_list_manual_packages_prefers_dnf_repoquery(monkeypatch): + monkeypatch.setattr( + rpm.shutil, "which", lambda exe: "/usr/bin/dnf" if exe == "dnf" else None + ) + + def fake_run(cmd, allow_fail=False, merge_err=False): + # First repoquery form returns usable output. + if cmd[:3] == ["dnf", "-q", "repoquery"]: + return 0, "vim-enhanced.x86_64\nhtop\nvim-enhanced.x86_64\n" + raise AssertionError(f"unexpected cmd: {cmd}") + + monkeypatch.setattr(rpm, "_run", fake_run) + + pkgs = rpm.list_manual_packages() + assert pkgs == ["htop", "vim-enhanced"] + + +def test_list_manual_packages_falls_back_to_history(monkeypatch): + monkeypatch.setattr( + rpm.shutil, "which", lambda exe: "/usr/bin/dnf" if exe == "dnf" else None + ) + + def fake_run(cmd, allow_fail=False, merge_err=False): + # repoquery fails + if cmd[:3] == ["dnf", "-q", "repoquery"]: + return 1, "" + if cmd[:3] == ["dnf", "-q", "history"]: + return ( + 0, + "Installed Packages\nvim-enhanced.x86_64\nLast metadata expiration check: 0:01:00 ago\n", + ) + raise AssertionError(f"unexpected cmd: {cmd}") + + monkeypatch.setattr(rpm, "_run", fake_run) + + pkgs = rpm.list_manual_packages() + assert pkgs == ["vim-enhanced"] + + +def test_build_rpm_etc_index_uses_fallback_when_rpm_output_mismatches(monkeypatch): + # Two files in /etc, one owned, one unowned. + monkeypatch.setattr( + rpm, "_walk_etc_files", lambda: ["/etc/owned.conf", "/etc/unowned.conf"] + ) + + # Simulate chunk query producing unexpected extra line (mismatch) -> triggers per-file fallback. + monkeypatch.setattr( + rpm, + "_run", + lambda cmd, allow_fail=False, merge_err=False: (0, "ownedpkg\nEXTRA\nTHIRD\n"), + ) + monkeypatch.setattr( + rpm, "rpm_owner", lambda p: "ownedpkg" if p == "/etc/owned.conf" else None + ) + + owned, owner_map, topdir_to_pkgs, pkg_to_etc = rpm.build_rpm_etc_index() + + assert owned == {"/etc/owned.conf"} + assert owner_map["/etc/owned.conf"] == "ownedpkg" + assert "owned.conf" in topdir_to_pkgs + assert pkg_to_etc["ownedpkg"] == ["/etc/owned.conf"] + + +def test_build_rpm_etc_index_parses_chunk_output(monkeypatch): + monkeypatch.setattr( + rpm, "_walk_etc_files", lambda: ["/etc/ssh/sshd_config", "/etc/notowned"] + ) + + def fake_run(cmd, allow_fail=False, merge_err=False): + # One output line per input path. + return 0, "openssh-server\nfile /etc/notowned is not owned by any package\n" + + monkeypatch.setattr(rpm, "_run", fake_run) + + owned, owner_map, topdir_to_pkgs, pkg_to_etc = rpm.build_rpm_etc_index() + + assert "/etc/ssh/sshd_config" in owned + assert "/etc/notowned" not in owned + assert owner_map["/etc/ssh/sshd_config"] == "openssh-server" + assert "ssh" in topdir_to_pkgs + assert "openssh-server" in topdir_to_pkgs["ssh"] + assert pkg_to_etc["openssh-server"] == ["/etc/ssh/sshd_config"] + + +def test_rpm_config_files_and_modified_files_parsing(monkeypatch): + monkeypatch.setattr( + rpm, + "_run", + lambda cmd, allow_fail=False, merge_err=False: ( + 0, + "/etc/foo.conf\n/usr/bin/tool\n", + ), + ) + assert rpm.rpm_config_files("mypkg") == {"/etc/foo.conf", "/usr/bin/tool"} + + # rpm -V returns only changed/missing files + out = "S.5....T. c /etc/foo.conf\nmissing /etc/bar\n" + monkeypatch.setattr( + rpm, "_run", lambda cmd, allow_fail=False, merge_err=False: (1, out) + ) + assert rpm.rpm_modified_files("mypkg") == {"/etc/foo.conf", "/etc/bar"} From 043802e80034b998c84a711e77ae8c69c8e0d137 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 29 Dec 2025 16:10:27 +1100 Subject: [PATCH 18/69] Refactor state structure and capture versions of packages --- enroll/debian.py | 44 ++++++++++++++++++ enroll/diff.py | 112 +++++++++++++++++++++++++++++++++++++-------- enroll/harvest.py | 75 ++++++++++++++++++++++++++---- enroll/manifest.py | 27 ++++++----- enroll/platform.py | 21 +++++++++ enroll/rpm.py | 57 +++++++++++++++++++++++ 6 files changed, 294 insertions(+), 42 deletions(-) diff --git a/enroll/debian.py b/enroll/debian.py index 7e1ee2d..9bf847e 100644 --- a/enroll/debian.py +++ b/enroll/debian.py @@ -63,6 +63,50 @@ def list_manual_packages() -> List[str]: return sorted(set(pkgs)) +def list_installed_packages() -> Dict[str, List[Dict[str, str]]]: + """Return mapping of installed package name -> installed instances. + + Uses dpkg-query and is expected to work on Debian/Ubuntu-like systems. + + Output format: + {"pkg": [{"version": "...", "arch": "..."}, ...], ...} + """ + + try: + p = subprocess.run( + [ + "dpkg-query", + "-W", + "-f=${Package}\t${Version}\t${Architecture}\n", + ], + text=True, + capture_output=True, + check=False, + ) # nosec + except Exception: + return {} + + out: Dict[str, List[Dict[str, str]]] = {} + for raw in (p.stdout or "").splitlines(): + line = raw.strip("\n") + if not line: + continue + parts = line.split("\t") + if len(parts) < 3: + continue + name, ver, arch = parts[0].strip(), parts[1].strip(), parts[2].strip() + if not name: + continue + out.setdefault(name, []).append({"version": ver, "arch": arch}) + + # Stable ordering for deterministic JSON dumps. + for k in list(out.keys()): + out[k] = sorted( + out[k], key=lambda x: (x.get("arch") or "", x.get("version") or "") + ) + return out + + def build_dpkg_etc_index( info_dir: str = "/var/lib/dpkg/info", ) -> Tuple[Set[str], Dict[str, str], Dict[str, Set[str]], Dict[str, List[str]]]: diff --git a/enroll/diff.py b/enroll/diff.py index 0110d17..5ad0eac 100644 --- a/enroll/diff.py +++ b/enroll/diff.py @@ -126,18 +126,62 @@ def _load_state(bundle_dir: Path) -> Dict[str, Any]: return json.load(f) +def _packages_inventory(state: Dict[str, Any]) -> Dict[str, Any]: + return (state.get("inventory") or {}).get("packages") or {} + + def _all_packages(state: Dict[str, Any]) -> List[str]: - pkgs = set(state.get("manual_packages", []) or []) - pkgs |= set(state.get("manual_packages_skipped", []) or []) - for s in state.get("services", []) or []: - for p in s.get("packages", []) or []: - pkgs.add(p) - return sorted(pkgs) + return sorted(_packages_inventory(state).keys()) + + +def _roles(state: Dict[str, Any]) -> Dict[str, Any]: + return state.get("roles") or {} + + +def _pkg_version_key(entry: Dict[str, Any]) -> Optional[str]: + """Return a stable string used for version comparison.""" + installs = entry.get("installations") or [] + if isinstance(installs, list) and installs: + parts: List[str] = [] + for inst in installs: + if not isinstance(inst, dict): + continue + arch = str(inst.get("arch") or "") + ver = str(inst.get("version") or "") + if not ver: + continue + parts.append(f"{arch}:{ver}" if arch else ver) + if parts: + return "|".join(sorted(parts)) + v = entry.get("version") + if v: + return str(v) + return None + + +def _pkg_version_display(entry: Dict[str, Any]) -> Optional[str]: + v = entry.get("version") + if v: + return str(v) + installs = entry.get("installations") or [] + if isinstance(installs, list) and installs: + parts: List[str] = [] + for inst in installs: + if not isinstance(inst, dict): + continue + arch = str(inst.get("arch") or "") + ver = str(inst.get("version") or "") + if not ver: + continue + parts.append(f"{ver} ({arch})" if arch else ver) + if parts: + return ", ".join(sorted(parts)) + return None def _service_units(state: Dict[str, Any]) -> Dict[str, Dict[str, Any]]: out: Dict[str, Dict[str, Any]] = {} - for s in state.get("services", []) or []: + for s in _roles(state).get("services") or []: unit = s.get("unit") if unit: out[str(unit)] = s @@ -145,7 +189,7 @@ def _service_units(state: Dict[str, Any]) -> Dict[str, Dict[str, Any]]: def _users_by_name(state: Dict[str, Any]) -> Dict[str, Dict[str, Any]]: - users = (state.get("users") or {}).get("users") or [] + users = (_roles(state).get("users") or {}).get("users") or [] out: Dict[str, Dict[str, Any]] = {} for u in users: name = u.get("name") @@ -167,43 +211,43 @@ class FileRec: def _iter_managed_files(state: Dict[str, Any]) -> Iterable[Tuple[str, Dict[str, Any]]]: # Services - for s in state.get("services", []) or []: + for s in _roles(state).get("services") or []: role = s.get("role_name") or "unknown" for mf in s.get("managed_files", []) or []: yield str(role), mf # Package roles - for p in state.get("package_roles", []) or []: + for p in _roles(state).get("packages") or []: role = p.get("role_name") or "unknown" for mf in p.get("managed_files", []) or []: yield str(role), mf # Users - u = state.get("users") or {} + u = _roles(state).get("users") or {} u_role = u.get("role_name") or "users" for mf in u.get("managed_files", []) or []: yield str(u_role), mf # apt_config - ac = state.get("apt_config") or {} + ac = _roles(state).get("apt_config") or {} ac_role = ac.get("role_name") or "apt_config" for mf in ac.get("managed_files", []) or []: yield str(ac_role), mf # etc_custom - ec = state.get("etc_custom") or {} + ec = _roles(state).get("etc_custom") or {} ec_role = ec.get("role_name") or "etc_custom" for mf in ec.get("managed_files", []) or []: yield str(ec_role), mf # usr_local_custom - ul = state.get("usr_local_custom") or {} + ul = _roles(state).get("usr_local_custom") or {} ul_role = ul.get("role_name") or "usr_local_custom" for mf in ul.get("managed_files", []) or []: yield str(ul_role), mf # extra_paths - xp = state.get("extra_paths") or {} + xp = _roles(state).get("extra_paths") or {} xp_role = xp.get("role_name") or "extra_paths" for mf in xp.get("managed_files", []) or []: yield str(xp_role), mf @@ -261,12 +305,28 @@ def compare_harvests( old_state = _load_state(old_b.dir) new_state = _load_state(new_b.dir) - old_pkgs = set(_all_packages(old_state)) - new_pkgs = set(_all_packages(new_state)) + old_inv = _packages_inventory(old_state) + new_inv = _packages_inventory(new_state) + + old_pkgs = set(old_inv.keys()) + new_pkgs = set(new_inv.keys()) pkgs_added = sorted(new_pkgs - old_pkgs) pkgs_removed = sorted(old_pkgs - new_pkgs) + pkgs_version_changed: List[Dict[str, Any]] = [] + for pkg in sorted(old_pkgs & new_pkgs): + a = old_inv.get(pkg) or {} + b = new_inv.get(pkg) or {} + if _pkg_version_key(a) != _pkg_version_key(b): + pkgs_version_changed.append( + { + "package": pkg, + "old": _pkg_version_display(a), + "new": _pkg_version_display(b), + } + ) + old_units = _service_units(old_state) new_units = _service_units(new_state) units_added = sorted(set(new_units) - set(old_units)) @@ -380,6 +440,7 @@ def compare_harvests( [ pkgs_added, pkgs_removed, + pkgs_version_changed, units_added, units_removed, units_changed, @@ -413,7 +474,11 @@ def compare_harvests( "state_mtime": _mtime_iso(new_b.state_path), "host": (new_state.get("host") or {}).get("hostname"), }, - "packages": {"added": pkgs_added, "removed": pkgs_removed}, + "packages": { + "added": pkgs_added, + "removed": pkgs_removed, + "version_changed": pkgs_version_changed, + }, "services": { "enabled_added": units_added, "enabled_removed": units_removed, @@ -471,10 +536,13 @@ def _report_text(report: Dict[str, Any]) -> str: lines.append("\nPackages") lines.append(f" added: {len(pk.get('added', []) or [])}") lines.append(f" removed: {len(pk.get('removed', []) or [])}") + lines.append(f" version_changed: {len(pk.get('version_changed', []) or [])}") for p in pk.get("added", []) or []: lines.append(f" + {p}") for p in pk.get("removed", []) or []: lines.append(f" - {p}") + for ch in pk.get("version_changed", []) or []: + lines.append(f" ~ {ch.get('package')}: {ch.get('old')} -> {ch.get('new')}") sv = report.get("services", {}) lines.append("\nServices (enabled systemd units)") @@ -542,6 +610,7 @@ def _report_text(report: Dict[str, Any]) -> str: [ (pk.get("added") or []), (pk.get("removed") or []), + (pk.get("version_changed") or []), (sv.get("enabled_added") or []), (sv.get("enabled_removed") or []), (sv.get("changed") or []), @@ -578,6 +647,12 @@ def _report_markdown(report: Dict[str, Any]) -> str: for p in pk.get("removed", []) or []: out.append(f" - `- {p}`\n") + out.append(f"- Version changed: {len(pk.get('version_changed', []) or [])}\n") + for ch in pk.get("version_changed", []) or []: + out.append( + f" - `~ {ch.get('package')}`: `{ch.get('old')}` → `{ch.get('new')}`\n" + ) + sv = report.get("services", {}) out.append("## Services (enabled systemd units)\n") if sv.get("enabled_added"): @@ -672,6 +747,7 @@ def _report_markdown(report: Dict[str, Any]) -> str: [ (pk.get("added") or []), (pk.get("removed") or []), + (pk.get("version_changed") or []), (sv.get("enabled_added") or []), (sv.get("enabled_removed") or []), (sv.get("changed") or []), diff --git a/enroll/harvest.py b/enroll/harvest.py index bb706b1..4ca3984 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -5,6 +5,7 @@ import json import os import re import shutil +import time from dataclasses import dataclass, asdict from typing import Dict, List, Optional, Set @@ -1481,9 +1482,60 @@ def harvest( notes=extra_notes, ) + # ------------------------- + # Inventory: packages (SBOM-ish) + # ------------------------- + installed = backend.installed_packages() or {} + + manual_set: Set[str] = set(manual_pkgs or []) + + pkg_units: Dict[str, Set[str]] = {} + pkg_roles_map: Dict[str, Set[str]] = {} + + for svc in service_snaps: + for p in svc.packages: + pkg_units.setdefault(p, set()).add(svc.unit) + pkg_roles_map.setdefault(p, set()).add(svc.role_name) + + pkg_role_names: Dict[str, List[str]] = {} + for ps in pkg_snaps: + pkg_roles_map.setdefault(ps.package, set()).add(ps.role_name) + pkg_role_names.setdefault(ps.package, []).append(ps.role_name) + + pkg_names: Set[str] = set() + pkg_names |= manual_set + pkg_names |= set(pkg_units.keys()) + pkg_names |= {ps.package for ps in pkg_snaps} + + packages_inventory: Dict[str, Dict[str, object]] = {} + for pkg in sorted(pkg_names): + installs = installed.get(pkg, []) or [] + arches = sorted({i.get("arch") for i in installs if i.get("arch")}) + vers = sorted({i.get("version") for i in installs if i.get("version")}) + version: Optional[str] = vers[0] if len(vers) == 1 else None + + observed: List[Dict[str, str]] = [] + if pkg in manual_set: + observed.append({"kind": "user_installed"}) + for unit in sorted(pkg_units.get(pkg, set())): + observed.append({"kind": "systemd_unit", "ref": unit}) + for rn in sorted(set(pkg_role_names.get(pkg, []))): + observed.append({"kind": "package_role", "ref": rn}) + + roles = sorted(pkg_roles_map.get(pkg, set())) + + packages_inventory[pkg] = { + "version": version, + "arches": arches, + "installations": installs, + "observed_via": observed, + "roles": roles, + } + state = { "enroll": { "version": get_enroll_version(), + "harvest_time": time.time_ns(), }, "host": { "hostname": os.uname().nodename, @@ -1491,16 +1543,19 @@ def harvest( "pkg_backend": backend.name, "os_release": platform.os_release, }, - "users": asdict(users_snapshot), - "services": [asdict(s) for s in service_snaps], - "manual_packages": manual_pkgs, - "manual_packages_skipped": manual_pkgs_skipped, - "package_roles": [asdict(p) for p in pkg_snaps], - "apt_config": asdict(apt_config_snapshot), - "dnf_config": asdict(dnf_config_snapshot), - "etc_custom": asdict(etc_custom_snapshot), - "usr_local_custom": asdict(usr_local_custom_snapshot), - "extra_paths": asdict(extra_paths_snapshot), + "inventory": { + "packages": packages_inventory, + }, + "roles": { + "users": asdict(users_snapshot), + "services": [asdict(s) for s in service_snaps], + "packages": [asdict(p) for p in pkg_snaps], + "apt_config": asdict(apt_config_snapshot), + "dnf_config": asdict(dnf_config_snapshot), + "etc_custom": asdict(etc_custom_snapshot), + "usr_local_custom": asdict(usr_local_custom_snapshot), + "extra_paths": asdict(extra_paths_snapshot), + }, } state_path = os.path.join(bundle_dir, "state.json") diff --git a/enroll/manifest.py b/enroll/manifest.py index 923040f..8b4008b 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -271,9 +271,7 @@ def _write_hostvars(site_root: str, fqdn: str, role: str, data: Dict[str, Any]) merged = _merge_mappings_overwrite(existing_map, data) - out = "# Generated by enroll (host-specific vars)\n---\n" + _yaml_dump_mapping( - merged, sort_keys=True - ) + out = "---\n" + _yaml_dump_mapping(merged, sort_keys=True) with open(path, "w", encoding="utf-8") as f: f.write(out) @@ -392,7 +390,7 @@ def _render_generic_files_tasks( # Using first_found makes roles work in both modes: # - site-mode: inventory/host_vars///.files/... # - non-site: roles//files/... - return f"""# Generated by enroll + return f""" - name: Deploy any systemd unit files (templates) ansible.builtin.template: @@ -477,7 +475,7 @@ def _render_install_packages_tasks(role: str, var_prefix: str) -> str: generic `package` module. This keeps generated roles usable on both Debian-like and RPM-like systems. """ - return f"""# Generated by enroll + return f""" - name: Install packages for {role} (APT) ansible.builtin.apt: @@ -672,14 +670,16 @@ def _manifest_from_bundle_dir( with open(state_path, "r", encoding="utf-8") as f: state = json.load(f) - services: List[Dict[str, Any]] = state.get("services", []) - package_roles: List[Dict[str, Any]] = state.get("package_roles", []) - users_snapshot: Dict[str, Any] = state.get("users", {}) - apt_config_snapshot: Dict[str, Any] = state.get("apt_config", {}) - dnf_config_snapshot: Dict[str, Any] = state.get("dnf_config", {}) - etc_custom_snapshot: Dict[str, Any] = state.get("etc_custom", {}) - usr_local_custom_snapshot: Dict[str, Any] = state.get("usr_local_custom", {}) - extra_paths_snapshot: Dict[str, Any] = state.get("extra_paths", {}) + roles: Dict[str, Any] = state.get("roles") or {} + + services: List[Dict[str, Any]] = roles.get("services", []) + package_roles: List[Dict[str, Any]] = roles.get("packages", []) + users_snapshot: Dict[str, Any] = roles.get("users", {}) + apt_config_snapshot: Dict[str, Any] = roles.get("apt_config", {}) + dnf_config_snapshot: Dict[str, Any] = roles.get("dnf_config", {}) + etc_custom_snapshot: Dict[str, Any] = roles.get("etc_custom", {}) + usr_local_custom_snapshot: Dict[str, Any] = roles.get("usr_local_custom", {}) + extra_paths_snapshot: Dict[str, Any] = roles.get("extra_paths", {}) site_mode = fqdn is not None and fqdn != "" @@ -839,7 +839,6 @@ def _manifest_from_bundle_dir( # tasks (data-driven) users_tasks = """--- -# Generated by enroll - name: Ensure groups exist ansible.builtin.group: diff --git a/enroll/platform.py b/enroll/platform.py index 998b83d..3c1904b 100644 --- a/enroll/platform.py +++ b/enroll/platform.py @@ -81,6 +81,17 @@ class PackageBackend: def list_manual_packages(self) -> List[str]: # pragma: no cover raise NotImplementedError + def installed_packages(self) -> Dict[str, List[Dict[str, str]]]: # pragma: no cover + """Return mapping of package name -> installed instances. + + Each instance is a dict with at least: + - version: package version string + - arch: architecture string + + Backends should be best-effort and return an empty mapping on failure. + """ + raise NotImplementedError + def build_etc_index( self, ) -> Tuple[ @@ -121,6 +132,11 @@ class DpkgBackend(PackageBackend): return list_manual_packages() + def installed_packages(self) -> Dict[str, List[Dict[str, str]]]: + from .debian import list_installed_packages + + return list_installed_packages() + def build_etc_index(self): from .debian import build_dpkg_etc_index @@ -194,6 +210,11 @@ class RpmBackend(PackageBackend): return list_manual_packages() + def installed_packages(self) -> Dict[str, List[Dict[str, str]]]: + from .rpm import list_installed_packages + + return list_installed_packages() + def build_etc_index(self): from .rpm import build_rpm_etc_index diff --git a/enroll/rpm.py b/enroll/rpm.py index 947617c..9e2892f 100644 --- a/enroll/rpm.py +++ b/enroll/rpm.py @@ -142,6 +142,63 @@ def list_manual_packages() -> List[str]: return [] +def list_installed_packages() -> Dict[str, List[Dict[str, str]]]: + """Return mapping of installed package name -> installed instances. + + Uses `rpm -qa` and is expected to work on RHEL/Fedora-like systems. + + Output format: + {"pkg": [{"version": "...", "arch": "..."}, ...], ...} + + The version string is formatted as: + - "-" for typical packages + - ":-" if a non-zero epoch is present + """ + + try: + _, out = _run( + [ + "rpm", + "-qa", + "--qf", + "%{NAME}\t%{EPOCHNUM}\t%{VERSION}\t%{RELEASE}\t%{ARCH}\n", + ], + allow_fail=False, + merge_err=True, + ) + except Exception: + return {} + + pkgs: Dict[str, List[Dict[str, str]]] = {} + for raw in (out or "").splitlines(): + line = raw.strip("\n") + if not line: + continue + parts = line.split("\t") + if len(parts) < 5: + continue + name, epoch, ver, rel, arch = [p.strip() for p in parts[:5]] + if not name or not ver: + continue + + # Normalise epoch. + epoch = epoch.strip() + if epoch.lower() in ("(none)", "none", ""): + epoch = "0" + + v = f"{ver}-{rel}" if rel else ver + if epoch and epoch.isdigit() and epoch != "0": + v = f"{epoch}:{v}" + + pkgs.setdefault(name, []).append({"version": v, "arch": arch}) + + for k in list(pkgs.keys()): + pkgs[k] = sorted( + pkgs[k], key=lambda x: (x.get("arch") or "", x.get("version") or "") + ) + return pkgs + + def _walk_etc_files() -> List[str]: out: List[str] = [] for dirpath, _, filenames in os.walk("/etc"): From 081739fd19ba4983fa00b28c9d6969e40bef712d Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 29 Dec 2025 16:35:21 +1100 Subject: [PATCH 19/69] Fix tests --- enroll/manifest.py | 15 +- enroll/rpm.py | 2 +- tests/test_diff_usr_local_custom.py | 147 ++++++---- tests/test_harvest.py | 42 ++- tests/test_jinjaturtle.py | 104 ++++--- tests/test_manifest.py | 425 +++++++++++++++++----------- 6 files changed, 457 insertions(+), 278 deletions(-) diff --git a/enroll/manifest.py b/enroll/manifest.py index 8b4008b..bc629bb 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -390,9 +390,7 @@ def _render_generic_files_tasks( # Using first_found makes roles work in both modes: # - site-mode: inventory/host_vars///.files/... # - non-site: roles//files/... - return f""" - -- name: Deploy any systemd unit files (templates) + return f"""- name: Deploy any systemd unit files (templates) ansible.builtin.template: src: "{{{{ item.src_rel }}}}.j2" dest: "{{{{ item.dest }}}}" @@ -475,9 +473,7 @@ def _render_install_packages_tasks(role: str, var_prefix: str) -> str: generic `package` module. This keeps generated roles usable on both Debian-like and RPM-like systems. """ - return f""" - -- name: Install packages for {role} (APT) + return f"""- name: Install packages for {role} (APT) ansible.builtin.apt: name: "{{{{ {var_prefix}_packages | default([]) }}}}" state: present @@ -995,7 +991,7 @@ Generated non-system user accounts and SSH public material. else: _write_role_defaults(role_dir, vars_map) - tasks = """---\n""" + _render_generic_files_tasks( + tasks = "---\n" + _render_generic_files_tasks( var_prefix, include_restart_notify=False ) with open( @@ -1297,7 +1293,7 @@ DNF/YUM configuration harvested from the system (repos, config files, and RPM GP else: _write_role_defaults(role_dir, vars_map) - tasks = """---\n""" + _render_generic_files_tasks( + tasks = "---\n" + _render_generic_files_tasks( var_prefix, include_restart_notify=False ) with open( @@ -1663,8 +1659,7 @@ User-requested extra file harvesting. ) task_parts.append( - f""" -- name: Probe whether systemd unit exists and is manageable + f"""- name: Probe whether systemd unit exists and is manageable ansible.builtin.systemd: name: "{{{{ {var_prefix}_unit_name }}}}" check_mode: true diff --git a/enroll/rpm.py b/enroll/rpm.py index 9e2892f..0314670 100644 --- a/enroll/rpm.py +++ b/enroll/rpm.py @@ -104,7 +104,7 @@ def list_manual_packages() -> List[str]: if pkgs: return _dedupe(pkgs) - # Fallback: human-oriented output. + # Fallback rc, out = _run( ["dnf", "-q", "history", "userinstalled"], allow_fail=True, merge_err=True ) diff --git a/tests/test_diff_usr_local_custom.py b/tests/test_diff_usr_local_custom.py index 88d594f..28ec57c 100644 --- a/tests/test_diff_usr_local_custom.py +++ b/tests/test_diff_usr_local_custom.py @@ -18,65 +18,106 @@ def test_diff_includes_usr_local_custom_files(tmp_path: Path): new = tmp_path / "new" old_state = { - "host": {"hostname": "h1", "os": "debian"}, - "users": { - "role_name": "users", - "users": [], - "managed_files": [], - "excluded": [], - "notes": [], - }, - "services": [], - "package_roles": [], - "manual_packages": ["curl"], - "manual_packages_skipped": [], - "etc_custom": { - "role_name": "etc_custom", - "managed_files": [], - "excluded": [], - "notes": [], - }, - "usr_local_custom": { - "role_name": "usr_local_custom", - "managed_files": [ - { - "path": "/usr/local/etc/myapp.conf", - "src_rel": "usr/local/etc/myapp.conf", - "owner": "root", - "group": "root", - "mode": "0644", - "reason": "usr_local_etc_custom", + "schema_version": 3, + "host": {"hostname": "h1", "os": "debian", "pkg_backend": "dpkg"}, + "inventory": { + "packages": { + "curl": { + "version": "1.0", + "arches": [], + "installations": [{"version": "1.0", "arch": "amd64"}], + "observed_via": [{"kind": "user_installed"}], + "roles": [], } - ], - "excluded": [], - "notes": [], + } + }, + "roles": { + "users": { + "role_name": "users", + "users": [], + "managed_files": [], + "excluded": [], + "notes": [], + }, + "services": [], + "packages": [], + "apt_config": { + "role_name": "apt_config", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "etc_custom": { + "role_name": "etc_custom", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "usr_local_custom": { + "role_name": "usr_local_custom", + "managed_files": [ + { + "path": "/usr/local/etc/myapp.conf", + "src_rel": "usr/local/etc/myapp.conf", + "owner": "root", + "group": "root", + "mode": "0644", + "reason": "usr_local_etc_custom", + } + ], + "excluded": [], + "notes": [], + }, + "extra_paths": { + "role_name": "extra_paths", + "include_patterns": [], + "exclude_patterns": [], + "managed_files": [], + "excluded": [], + "notes": [], + }, }, } + new_state = { **old_state, - "manual_packages": ["curl", "htop"], - "usr_local_custom": { - "role_name": "usr_local_custom", - "managed_files": [ - { - "path": "/usr/local/etc/myapp.conf", - "src_rel": "usr/local/etc/myapp.conf", - "owner": "root", - "group": "root", - "mode": "0644", - "reason": "usr_local_etc_custom", + "inventory": { + "packages": { + **old_state["inventory"]["packages"], + "htop": { + "version": "3.0", + "arches": [], + "installations": [{"version": "3.0", "arch": "amd64"}], + "observed_via": [{"kind": "user_installed"}], + "roles": [], }, - { - "path": "/usr/local/bin/myscript", - "src_rel": "usr/local/bin/myscript", - "owner": "root", - "group": "root", - "mode": "0755", - "reason": "usr_local_bin_script", - }, - ], - "excluded": [], - "notes": [], + } + }, + "roles": { + **old_state["roles"], + "usr_local_custom": { + "role_name": "usr_local_custom", + "managed_files": [ + { + "path": "/usr/local/etc/myapp.conf", + "src_rel": "usr/local/etc/myapp.conf", + "owner": "root", + "group": "root", + "mode": "0644", + "reason": "usr_local_etc_custom", + }, + { + "path": "/usr/local/bin/myscript", + "src_rel": "usr/local/bin/myscript", + "owner": "root", + "group": "root", + "mode": "0755", + "reason": "usr_local_bin_script", + }, + ], + "excluded": [], + "notes": [], + }, }, } diff --git a/tests/test_harvest.py b/tests/test_harvest.py index a0d22ec..1b884aa 100644 --- a/tests/test_harvest.py +++ b/tests/test_harvest.py @@ -30,6 +30,7 @@ class FakeBackend: owner_fn, modified_by_pkg: dict[str, dict[str, str]] | None = None, pkg_config_prefixes: tuple[str, ...] = ("/etc/apt/",), + installed: dict[str, list[dict[str, str]]] | None = None, ): self.name = name self.pkg_config_prefixes = pkg_config_prefixes @@ -40,6 +41,7 @@ class FakeBackend: self._manual = manual_pkgs self._owner_fn = owner_fn self._modified_by_pkg = modified_by_pkg or {} + self._installed = installed or {} def build_etc_index(self): return ( @@ -55,6 +57,14 @@ class FakeBackend: def list_manual_packages(self): return list(self._manual) + def installed_packages(self): + """Return mapping package -> installations. + + The real backends return: + {"pkg": [{"version": "...", "arch": "..."}, ...]} + """ + return dict(self._installed) + def specific_paths_for_hints(self, hints: set[str]): return [] @@ -214,26 +224,36 @@ def test_harvest_dedup_manual_packages_and_builds_etc_custom( state_path = h.harvest(str(bundle), policy=AllowAllPolicy()) st = json.loads(Path(state_path).read_text(encoding="utf-8")) - assert "openvpn" in st["manual_packages"] - assert "curl" in st["manual_packages"] - assert "openvpn" in st["manual_packages_skipped"] - assert all(pr["package"] != "openvpn" for pr in st["package_roles"]) - assert any(pr["package"] == "curl" for pr in st["package_roles"]) + inv = st["inventory"]["packages"] + assert "openvpn" in inv + assert "curl" in inv + + # openvpn is managed by the service role, so it should NOT appear as a package role. + pkg_roles = st["roles"]["packages"] + assert all(pr["package"] != "openvpn" for pr in pkg_roles) + assert any(pr["package"] == "curl" for pr in pkg_roles) + + # Inventory provenance: openvpn should be observed via systemd unit. + openvpn_obs = inv["openvpn"]["observed_via"] + assert any( + o.get("kind") == "systemd_unit" and o.get("ref") == "openvpn.service" + for o in openvpn_obs + ) # Service role captured modified conffile - svc = st["services"][0] + svc = st["roles"]["services"][0] assert svc["unit"] == "openvpn.service" assert "openvpn" in svc["packages"] assert any(mf["path"] == "/etc/openvpn/server.conf" for mf in svc["managed_files"]) # Unowned /etc/default/keyboard is attributed to etc_custom only - etc_custom = st["etc_custom"] + etc_custom = st["roles"]["etc_custom"] assert any( mf["path"] == "/etc/default/keyboard" for mf in etc_custom["managed_files"] ) # /usr/local content is attributed to usr_local_custom - ul = st["usr_local_custom"] + ul = st["roles"]["usr_local_custom"] assert any(mf["path"] == "/usr/local/etc/myapp.conf" for mf in ul["managed_files"]) assert any(mf["path"] == "/usr/local/bin/myscript" for mf in ul["managed_files"]) assert all(mf["path"] != "/usr/local/bin/readme.txt" for mf in ul["managed_files"]) @@ -338,10 +358,12 @@ def test_shared_cron_snippet_prefers_matching_role_over_lexicographic( st = json.loads(Path(state_path).read_text(encoding="utf-8")) # Cron snippet should end up attached to the ntpsec role, not apparmor. - svc_ntpsec = next(s for s in st["services"] if s["role_name"] == "ntpsec") + svc_ntpsec = next(s for s in st["roles"]["services"] if s["role_name"] == "ntpsec") assert any(mf["path"] == "/etc/cron.d/ntpsec" for mf in svc_ntpsec["managed_files"]) - svc_apparmor = next(s for s in st["services"] if s["role_name"] == "apparmor") + svc_apparmor = next( + s for s in st["roles"]["services"] if s["role_name"] == "apparmor" + ) assert all( mf["path"] != "/etc/cron.d/ntpsec" for mf in svc_apparmor["managed_files"] ) diff --git a/tests/test_jinjaturtle.py b/tests/test_jinjaturtle.py index 68bb04c..c0447b1 100644 --- a/tests/test_jinjaturtle.py +++ b/tests/test_jinjaturtle.py @@ -24,44 +24,78 @@ def test_manifest_uses_jinjaturtle_templates_and_does_not_copy_raw( ) state = { - "host": {"hostname": "test", "os": "debian"}, - "users": { - "role_name": "users", - "users": [], - "managed_files": [], - "excluded": [], - "notes": [], + "schema_version": 3, + "host": {"hostname": "test", "os": "debian", "pkg_backend": "dpkg"}, + "inventory": { + "packages": { + "foo": { + "version": "1.0", + "arches": [], + "installations": [{"version": "1.0", "arch": "amd64"}], + "observed_via": [{"kind": "systemd_unit", "ref": "foo.service"}], + "roles": ["foo"], + } + } }, - "etc_custom": { - "role_name": "etc_custom", - "managed_files": [], - "excluded": [], - "notes": [], - }, - "services": [ - { - "unit": "foo.service", - "role_name": "foo", - "packages": ["foo"], - "active_state": "inactive", - "sub_state": "dead", - "unit_file_state": "disabled", - "condition_result": "no", - "managed_files": [ - { - "path": "/etc/foo.ini", - "src_rel": "etc/foo.ini", - "owner": "root", - "group": "root", - "mode": "0644", - "reason": "modified_conffile", - } - ], + "roles": { + "users": { + "role_name": "users", + "users": [], + "managed_files": [], "excluded": [], "notes": [], - } - ], - "package_roles": [], + }, + "services": [ + { + "unit": "foo.service", + "role_name": "foo", + "packages": ["foo"], + "active_state": "inactive", + "sub_state": "dead", + "unit_file_state": "disabled", + "condition_result": "no", + "managed_files": [ + { + "path": "/etc/foo.ini", + "src_rel": "etc/foo.ini", + "owner": "root", + "group": "root", + "mode": "0644", + "reason": "modified_conffile", + } + ], + "excluded": [], + "notes": [], + } + ], + "packages": [], + "apt_config": { + "role_name": "apt_config", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "etc_custom": { + "role_name": "etc_custom", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "usr_local_custom": { + "role_name": "usr_local_custom", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "extra_paths": { + "role_name": "extra_paths", + "include_patterns": [], + "exclude_patterns": [], + "managed_files": [], + "excluded": [], + "notes": [], + }, + }, } bundle.mkdir(parents=True, exist_ok=True) diff --git a/tests/test_manifest.py b/tests/test_manifest.py index cbfc208..fec9cc3 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -13,95 +13,136 @@ def test_manifest_writes_roles_and_playbook_with_clean_when(tmp_path: Path): ) state = { - "host": {"hostname": "test", "os": "debian"}, - "users": { - "role_name": "users", - "users": [ + "schema_version": 3, + "host": {"hostname": "test", "os": "debian", "pkg_backend": "dpkg"}, + "inventory": { + "packages": { + "foo": { + "version": "1.0", + "arches": [], + "installations": [{"version": "1.0", "arch": "amd64"}], + "observed_via": [{"kind": "systemd_unit", "ref": "foo.service"}], + "roles": ["foo"], + }, + "curl": { + "version": "8.0", + "arches": [], + "installations": [{"version": "8.0", "arch": "amd64"}], + "observed_via": [{"kind": "package_role", "ref": "curl"}], + "roles": ["curl"], + }, + } + }, + "roles": { + "users": { + "role_name": "users", + "users": [ + { + "name": "alice", + "uid": 1000, + "gid": 1000, + "gecos": "Alice", + "home": "/home/alice", + "shell": "/bin/bash", + "primary_group": "alice", + "supplementary_groups": ["docker", "qubes"], + } + ], + "managed_files": [], + "excluded": [], + "notes": [], + }, + "services": [ { - "name": "alice", - "uid": 1000, - "gid": 1000, - "gecos": "Alice", - "home": "/home/alice", - "shell": "/bin/bash", - "primary_group": "alice", - "supplementary_groups": ["docker", "qubes"], + "unit": "foo.service", + "role_name": "foo", + "packages": ["foo"], + "active_state": "inactive", + "sub_state": "dead", + "unit_file_state": "enabled", + "condition_result": "no", + "managed_files": [ + { + "path": "/etc/foo.conf", + "src_rel": "etc/foo.conf", + "owner": "root", + "group": "root", + "mode": "0644", + "reason": "modified_conffile", + } + ], + "excluded": [], + "notes": [], } ], - "managed_files": [], - "excluded": [], - "notes": [], - }, - "etc_custom": { - "role_name": "etc_custom", - "managed_files": [ + "packages": [ { - "path": "/etc/default/keyboard", - "src_rel": "etc/default/keyboard", - "owner": "root", - "group": "root", - "mode": "0644", - "reason": "custom_unowned", + "package": "curl", + "role_name": "curl", + "managed_files": [], + "excluded": [], + "notes": [], } ], - "excluded": [], - "notes": [], - }, - "usr_local_custom": { - "role_name": "usr_local_custom", - "managed_files": [ - { - "path": "/usr/local/etc/myapp.conf", - "src_rel": "usr/local/etc/myapp.conf", - "owner": "root", - "group": "root", - "mode": "0644", - "reason": "usr_local_etc_custom", - }, - { - "path": "/usr/local/bin/myscript", - "src_rel": "usr/local/bin/myscript", - "owner": "root", - "group": "root", - "mode": "0755", - "reason": "usr_local_bin_script", - }, - ], - "excluded": [], - "notes": [], - }, - "services": [ - { - "unit": "foo.service", - "role_name": "foo", - "packages": ["foo"], - "active_state": "inactive", - "sub_state": "dead", - "unit_file_state": "enabled", - "condition_result": "no", + "apt_config": { + "role_name": "apt_config", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "dnf_config": { + "role_name": "dnf_config", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "etc_custom": { + "role_name": "etc_custom", "managed_files": [ { - "path": "/etc/foo.conf", - "src_rel": "etc/foo.conf", + "path": "/etc/default/keyboard", + "src_rel": "etc/default/keyboard", "owner": "root", "group": "root", "mode": "0644", - "reason": "modified_conffile", + "reason": "custom_unowned", } ], "excluded": [], "notes": [], - } - ], - "package_roles": [ - { - "package": "curl", - "role_name": "curl", + }, + "usr_local_custom": { + "role_name": "usr_local_custom", + "managed_files": [ + { + "path": "/usr/local/etc/myapp.conf", + "src_rel": "usr/local/etc/myapp.conf", + "owner": "root", + "group": "root", + "mode": "0644", + "reason": "usr_local_etc_custom", + }, + { + "path": "/usr/local/bin/myscript", + "src_rel": "usr/local/bin/myscript", + "owner": "root", + "group": "root", + "mode": "0755", + "reason": "usr_local_bin_script", + }, + ], + "excluded": [], + "notes": [], + }, + "extra_paths": { + "role_name": "extra_paths", + "include_patterns": [], + "exclude_patterns": [], "managed_files": [], "excluded": [], "notes": [], - } - ], + }, + }, } bundle.mkdir(parents=True, exist_ok=True) @@ -189,68 +230,102 @@ def test_manifest_site_mode_creates_host_inventory_and_raw_files(tmp_path: Path) ) state = { - "host": {"hostname": "test", "os": "debian"}, - "users": { - "role_name": "users", - "users": [], - "managed_files": [], - "excluded": [], - "notes": [], + "schema_version": 3, + "host": {"hostname": "test", "os": "debian", "pkg_backend": "dpkg"}, + "inventory": { + "packages": { + "foo": { + "version": "1.0", + "arches": [], + "installations": [{"version": "1.0", "arch": "amd64"}], + "observed_via": [{"kind": "systemd_unit", "ref": "foo.service"}], + "roles": ["foo"], + } + } }, - "etc_custom": { - "role_name": "etc_custom", - "managed_files": [ + "roles": { + "users": { + "role_name": "users", + "users": [], + "managed_files": [], + "excluded": [], + "notes": [], + }, + "services": [ { - "path": "/etc/default/keyboard", - "src_rel": "etc/default/keyboard", - "owner": "root", - "group": "root", - "mode": "0644", - "reason": "custom_unowned", + "unit": "foo.service", + "role_name": "foo", + "packages": ["foo"], + "active_state": "active", + "sub_state": "running", + "unit_file_state": "enabled", + "condition_result": "yes", + "managed_files": [ + { + "path": "/etc/foo.conf", + "src_rel": "etc/foo.conf", + "owner": "root", + "group": "root", + "mode": "0644", + "reason": "modified_conffile", + } + ], + "excluded": [], + "notes": [], } ], - "excluded": [], - "notes": [], - }, - "usr_local_custom": { - "role_name": "usr_local_custom", - "managed_files": [ - { - "path": "/usr/local/etc/myapp.conf", - "src_rel": "usr/local/etc/myapp.conf", - "owner": "root", - "group": "root", - "mode": "0644", - "reason": "usr_local_etc_custom", - } - ], - "excluded": [], - "notes": [], - }, - "services": [ - { - "unit": "foo.service", - "role_name": "foo", - "packages": ["foo"], - "active_state": "active", - "sub_state": "running", - "unit_file_state": "enabled", - "condition_result": "yes", + "packages": [], + "apt_config": { + "role_name": "apt_config", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "dnf_config": { + "role_name": "dnf_config", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "etc_custom": { + "role_name": "etc_custom", "managed_files": [ { - "path": "/etc/foo.conf", - "src_rel": "etc/foo.conf", + "path": "/etc/default/keyboard", + "src_rel": "etc/default/keyboard", "owner": "root", "group": "root", "mode": "0644", - "reason": "modified_conffile", + "reason": "custom_unowned", } ], "excluded": [], "notes": [], - } - ], - "package_roles": [], + }, + "usr_local_custom": { + "role_name": "usr_local_custom", + "managed_files": [ + { + "path": "/usr/local/etc/myapp.conf", + "src_rel": "usr/local/etc/myapp.conf", + "owner": "root", + "group": "root", + "mode": "0644", + "reason": "usr_local_etc_custom", + } + ], + "excluded": [], + "notes": [], + }, + "extra_paths": { + "role_name": "extra_paths", + "include_patterns": [], + "exclude_patterns": [], + "managed_files": [], + "excluded": [], + "notes": [], + }, + }, } bundle.mkdir(parents=True, exist_ok=True) @@ -337,58 +412,70 @@ def test_manifest_includes_dnf_config_role_when_present(tmp_path: Path): ) state = { + "schema_version": 3, "host": {"hostname": "test", "os": "redhat", "pkg_backend": "rpm"}, - "users": { - "role_name": "users", - "users": [], - "managed_files": [], - "excluded": [], - "notes": [], - }, - "services": [], - "package_roles": [], - "manual_packages": [], - "manual_packages_skipped": [], - "apt_config": { - "role_name": "apt_config", - "managed_files": [], - "excluded": [], - "notes": [], - }, - "dnf_config": { - "role_name": "dnf_config", - "managed_files": [ - { - "path": "/etc/dnf/dnf.conf", - "src_rel": "etc/dnf/dnf.conf", - "owner": "root", - "group": "root", - "mode": "0644", - "reason": "dnf_config", + "inventory": { + "packages": { + "dnf": { + "version": "4.0", + "arches": [], + "installations": [{"version": "4.0", "arch": "x86_64"}], + "observed_via": [{"kind": "dnf_config"}], + "roles": [], } - ], - "excluded": [], - "notes": [], + } }, - "etc_custom": { - "role_name": "etc_custom", - "managed_files": [], - "excluded": [], - "notes": [], - }, - "usr_local_custom": { - "role_name": "usr_local_custom", - "managed_files": [], - "excluded": [], - "notes": [], - }, - "extra_paths": { - "role_name": "extra_paths", - "include_patterns": [], - "exclude_patterns": [], - "managed_files": [], - "excluded": [], - "notes": [], + "roles": { + "users": { + "role_name": "users", + "users": [], + "managed_files": [], + "excluded": [], + "notes": [], + }, + "services": [], + "packages": [], + "apt_config": { + "role_name": "apt_config", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "dnf_config": { + "role_name": "dnf_config", + "managed_files": [ + { + "path": "/etc/dnf/dnf.conf", + "src_rel": "etc/dnf/dnf.conf", + "owner": "root", + "group": "root", + "mode": "0644", + "reason": "dnf_config", + } + ], + "excluded": [], + "notes": [], + }, + "etc_custom": { + "role_name": "etc_custom", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "usr_local_custom": { + "role_name": "usr_local_custom", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "extra_paths": { + "role_name": "extra_paths", + "include_patterns": [], + "exclude_patterns": [], + "managed_files": [], + "excluded": [], + "notes": [], + }, }, } From f01603dac484ab5c2d835d60e3edf510577cb6d9 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 29 Dec 2025 17:19:59 +1100 Subject: [PATCH 20/69] Better attribution of config files to parent service/role (not systemd helpers) --- enroll/harvest.py | 108 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 80 insertions(+), 28 deletions(-) diff --git a/enroll/harvest.py b/enroll/harvest.py index 4ca3984..74ac516 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -676,6 +676,10 @@ def harvest( backend.build_etc_index() ) + # Global de-duplication across roles: each absolute path is captured at most once. + # This avoids multiple Ansible roles managing the same destination file. + captured_global: Set[str] = set() + # ------------------------- # Service roles # ------------------------- @@ -685,8 +689,45 @@ def harvest( service_role_aliases: Dict[str, Set[str]] = {} # De-dupe per-role captures (avoids duplicate tasks in manifest generation). seen_by_role: Dict[str, Set[str]] = {} - for unit in list_enabled_services(): + # Managed/excluded lists keyed by role so helper services can attribute shared + # configuration to their parent service role. + managed_by_role: Dict[str, List[ManagedFile]] = {} + excluded_by_role: Dict[str, List[ExcludedFile]] = {} + + enabled_services = list_enabled_services() + enabled_set = set(enabled_services) + + def _service_sort_key(unit: str) -> tuple[int, str, str]: + # Prefer "parent" services over helpers (e.g. NetworkManager.service before + # NetworkManager-dispatcher.service) so shared config lands in the main role. + base = unit.removesuffix(".service") + base = base.split("@", 1)[0] + return (base.count("-"), base.lower(), unit.lower()) + + def _parent_service_unit(unit: str) -> Optional[str]: + # If unit name contains '-' segments, treat dashed prefixes as potential parents. + # Example: NetworkManager-dispatcher.service -> NetworkManager.service (if enabled). + if not unit.endswith(".service"): + return None + base = unit.removesuffix(".service") + base = base.split("@", 1)[0] + parts = base.split("-") + for i in range(len(parts) - 1, 0, -1): + cand = "-".join(parts[:i]) + ".service" + if cand in enabled_set: + return cand + return None + + parent_unit_for: Dict[str, str] = {} + for u in enabled_services: + pu = _parent_service_unit(u) + if pu: + parent_unit_for[u] = pu + + for unit in sorted(enabled_services, key=_service_sort_key): role = _role_name_from_unit(unit) + parent_unit = parent_unit_for.get(unit) + parent_role = _role_name_from_unit(parent_unit) if parent_unit else None try: ui = get_unit_info(unit) @@ -695,6 +736,8 @@ def harvest( # shared snippets can still be attributed to this role by name. service_role_aliases.setdefault(role, _hint_names(unit, set()) | {role}) seen_by_role.setdefault(role, set()) + managed = managed_by_role.setdefault(role, []) + excluded = excluded_by_role.setdefault(role, []) service_snaps.append( ServiceSnapshot( unit=unit, @@ -704,8 +747,8 @@ def harvest( sub_state=None, unit_file_state=None, condition_result=None, - managed_files=[], - excluded=[], + managed_files=managed, + excluded=excluded, notes=[str(e)], ) ) @@ -713,8 +756,8 @@ def harvest( pkgs: Set[str] = set() notes: List[str] = [] - excluded: List[ExcludedFile] = [] - managed: List[ManagedFile] = [] + excluded = excluded_by_role.setdefault(role, []) + managed = managed_by_role.setdefault(role, []) candidates: Dict[str, str] = {} if ui.fragment_path: @@ -810,18 +853,31 @@ def harvest( # De-dupe within this role while capturing. This also avoids emitting # duplicate Ansible tasks for the same destination path. - role_seen = seen_by_role.setdefault(role, set()) + # Attribute shared /etc config to the parent service role when this unit looks + # like a helper (e.g. NetworkManager-dispatcher.service -> NetworkManager.service). for path, reason in sorted(candidates.items()): + dest_role = role + if ( + parent_role + and path.startswith("/etc/") + and reason not in ("systemd_dropin", "systemd_envfile") + ): + dest_role = parent_role + + dest_managed = managed_by_role.setdefault(dest_role, []) + dest_excluded = excluded_by_role.setdefault(dest_role, []) + dest_seen = seen_by_role.setdefault(dest_role, set()) _capture_file( bundle_dir=bundle_dir, - role_name=role, + role_name=dest_role, abs_path=path, reason=reason, policy=policy, path_filter=path_filter, - managed_out=managed, - excluded_out=excluded, - seen_role=role_seen, + managed_out=dest_managed, + excluded_out=dest_excluded, + seen_role=dest_seen, + seen_global=captured_global, ) service_snaps.append( @@ -857,7 +913,7 @@ def harvest( s.unit: s for s in service_snaps } - for t in enabled_timers: + for t in sorted(enabled_timers): try: ti = get_timer_info(t) except Exception: # nosec @@ -895,6 +951,7 @@ def harvest( managed_out=snap.managed_files, excluded_out=snap.excluded, seen_role=role_seen, + seen_global=captured_global, ) continue @@ -935,7 +992,7 @@ def harvest( manual_pkgs_skipped: List[str] = [] pkg_snaps: List[PackageSnapshot] = [] - for pkg in manual_pkgs: + for pkg in sorted(manual_pkgs): if pkg in covered_by_services: manual_pkgs_skipped.append(pkg) continue @@ -997,6 +1054,7 @@ def harvest( managed_out=managed, excluded_out=excluded, seen_role=role_seen, + seen_global=captured_global, ) if not pkg_to_etc_paths.get(pkg, []) and not managed: @@ -1060,6 +1118,7 @@ def harvest( managed_out=users_managed, excluded_out=users_excluded, seen_role=users_role_seen, + seen_global=captured_global, ) users_snapshot = UsersSnapshot( @@ -1098,6 +1157,7 @@ def harvest( managed_out=apt_managed, excluded_out=apt_excluded, seen_role=apt_role_seen, + seen_global=captured_global, ) elif backend.name == "rpm": dnf_role_seen = seen_by_role.setdefault(dnf_role_name, set()) @@ -1112,6 +1172,7 @@ def harvest( managed_out=dnf_managed, excluded_out=dnf_excluded, seen_role=dnf_role_seen, + seen_global=captured_global, ) apt_config_snapshot = AptConfigSnapshot( @@ -1135,20 +1196,9 @@ def harvest( etc_managed: List[ManagedFile] = [] etc_role_name = "etc_custom" - # Build a set of files already captured by other roles. - already: Set[str] = set() - for s in service_snaps: - for mf in s.managed_files: - already.add(mf.path) - for p in pkg_snaps: - for mf in p.managed_files: - already.add(mf.path) - for mf in users_managed: - already.add(mf.path) - for mf in apt_managed: - already.add(mf.path) - for mf in dnf_managed: - already.add(mf.path) + # Files already captured by earlier roles. Use the global set so we never + # end up with the same destination path managed by multiple roles. + already: Set[str] = captured_global # Maps for re-attributing shared snippets (cron.d/logrotate.d) to existing roles. svc_by_role: Dict[str, ServiceSnapshot] = {s.role_name: s for s in service_snaps} @@ -1288,7 +1338,7 @@ def harvest( managed_out=managed_out, excluded_out=excluded_out, seen_role=role_seen, - seen_global=already, + seen_global=captured_global, ) # Walk /etc for remaining unowned config-ish files @@ -1327,7 +1377,7 @@ def harvest( managed_out=managed_out, excluded_out=excluded_out, seen_role=role_seen, - seen_global=already, + seen_global=captured_global, ): scanned += 1 if scanned >= MAX_FILES_CAP: @@ -1396,6 +1446,7 @@ def harvest( managed_out=ul_managed, excluded_out=ul_excluded, seen_role=role_seen, + seen_global=captured_global, metadata=(owner, group, mode), ): already_all.add(path) @@ -1470,6 +1521,7 @@ def harvest( managed_out=extra_managed, excluded_out=extra_excluded, seen_role=extra_role_seen, + seen_global=captured_global, ): already_all.add(path) From e44e4aaf3aa554daf5128d0cfa4720cd9d0f7f03 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 29 Dec 2025 17:39:39 +1100 Subject: [PATCH 21/69] 0.2.0 --- CHANGELOG.md | 1 + debian/changelog | 9 ++++++++- pyproject.toml | 2 +- rpm/enroll.spec | 6 +++++- 4 files changed, 15 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f92e0b7..49217f0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ * Add version CLI arg * Add ability to enroll RH-style systems (DNF5/DNF/RPM) + * Refactor harvest state to track package versions # 0.1.7 diff --git a/debian/changelog b/debian/changelog index eabdefc..f050e7f 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,12 @@ -enroll (0.1.7) unstable; urgency=medium +enroll (0.2.0) unstable; urgency=medium + * Add version CLI arg + * Add ability to enroll RH-style systems (DNF5/DNF/RPM) + * Refactor harvest state to track package versions + + -- Miguel Jacq Mon, 29 Dec 2025 17:30:00 +1100 + +enroll (0.1.7) unstable; urgency=medium * Fix an attribution bug for certain files ending up in the wrong package/role. -- Miguel Jacq Sun, 28 Dec 2025 18:30:00 +1100 diff --git a/pyproject.toml b/pyproject.toml index ca875e8..683a9b2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "enroll" -version = "0.1.7" +version = "0.2.0" description = "Enroll a server's running state retrospectively into Ansible" authors = ["Miguel Jacq "] license = "GPL-3.0-or-later" diff --git a/rpm/enroll.spec b/rpm/enroll.spec index f63a12c..3beac03 100644 --- a/rpm/enroll.spec +++ b/rpm/enroll.spec @@ -1,4 +1,4 @@ -%global upstream_version 0.1.7 +%global upstream_version 0.2.0 Name: enroll Version: %{upstream_version} @@ -43,6 +43,10 @@ Enroll a server's running state retrospectively into Ansible. %{_bindir}/enroll %changelog +* Mon Dec 29 2025 Miguel Jacq - %{version}-%{release} +- Add version CLI arg +- Add ability to enroll RH-style systems (DNF5/DNF/RPM) +- Refactor harvest state to track package versions * Sun Dec 28 2025 Miguel Jacq - %{version}-%{release} - Fix an attribution bug for certain files ending up in the wrong package/role. * Sun Dec 28 2025 Miguel Jacq - %{version}-%{release} From e4887b7add36f3e926f7362e3e159fd9c523beeb Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Thu, 1 Jan 2026 11:02:30 +1100 Subject: [PATCH 22/69] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d075951..f4920b5 100644 --- a/README.md +++ b/README.md @@ -191,7 +191,7 @@ sudo apt update sudo apt install enroll ``` -### Fedora 42 +## Fedora ```bash sudo rpm --import https://mig5.net/static/mig5.asc From 09438246ae0557185c3343c0db6e0101f2d75385 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Thu, 1 Jan 2026 15:24:21 +1100 Subject: [PATCH 23/69] Build for Fedora 43 --- Dockerfile.rpmbuild | 8 +++++--- release.sh | 45 ++++++++++++++++++++++++++++++--------------- 2 files changed, 35 insertions(+), 18 deletions(-) diff --git a/Dockerfile.rpmbuild b/Dockerfile.rpmbuild index c928cea..f76a673 100644 --- a/Dockerfile.rpmbuild +++ b/Dockerfile.rpmbuild @@ -1,5 +1,6 @@ # syntax=docker/dockerfile:1 -FROM fedora:42 +ARG BASE_IMAGE=fedora:42 +FROM ${BASE_IMAGE} RUN set -eux; \ dnf -y update; \ @@ -34,11 +35,12 @@ SRC="${SRC:-/src}" WORKROOT="${WORKROOT:-/work}" OUT="${OUT:-/out}" DEPS_DIR="${DEPS_DIR:-/deps}" - +VERSION_ID="$(grep VERSION_ID /etc/os-release | cut -d= -f2)" +echo "Version ID is ${VERSION_ID}" # Install jinjaturtle from local rpm # Filter out .src.rpm and debug* subpackages if present. if [ -d "${DEPS_DIR}" ] && compgen -G "${DEPS_DIR}/*.rpm" > /dev/null; then - mapfile -t rpms < <(ls -1 "${DEPS_DIR}"/*.rpm | grep -vE '(\.src\.rpm$|-(debuginfo|debugsource)-)') + mapfile -t rpms < <(ls -1 "${DEPS_DIR}"/*.rpm | grep -vE '(\.src\.rpm$|-(debuginfo|debugsource)-)' | grep "${VERSION_ID}") if [ "${#rpms[@]}" -gt 0 ]; then echo "Installing dependency RPMs from ${DEPS_DIR}:" printf ' - %s\n' "${rpms[@]}" diff --git a/release.sh b/release.sh index fdbe771..0a052c7 100755 --- a/release.sh +++ b/release.sh @@ -44,31 +44,46 @@ for dist in ${DISTS[@]}; do done # RPM -sudo apt-get -y install createrepo-c rpm -docker build -f Dockerfile.rpmbuild -t enroll:f42 --progress=plain . -docker run --rm -v "$PWD":/src -v "$PWD/dist/rpm":/out -v "$HOME/git/jinjaturtle/dist/rpm":/deps:ro enroll:f42 -sudo chown -R "${USER}" "$PWD/dist" - REPO_ROOT="${HOME}/git/repo_rpm" RPM_REPO="${REPO_ROOT}/rpm/x86_64" BUILD_OUTPUT="${HOME}/git/enroll/dist" REMOTE="letessier.mig5.net:/opt/repo_rpm" KEYID="00AE817C24A10C2540461A9C1D7CDE0234DB458D" -echo "==> Updating RPM repo..." mkdir -p "$RPM_REPO" +sudo apt-get -y install createrepo-c rpm -for file in `ls -1 "${BUILD_OUTPUT}/rpm"`; do - rpmsign --addsign "${BUILD_OUTPUT}/rpm/$file" +DISTS=( + fedora:43 + fedora:42 +) + +for dist in ${DISTS[@]}; do + release=$(echo ${dist} | cut -d: -f2) + docker build \ + --no-cache \ + -f Dockerfile.rpmbuild \ + -t enroll-rpm:${release} \ + --progress=plain \ + --build-arg BASE_IMAGE=${dist} \ + . + + docker run --rm -v "$PWD":/src -v "$PWD/dist/rpm":/out -v "$HOME/git/jinjaturtle/dist/rpm":/deps:ro enroll-rpm:${release} + sudo chown -R "${USER}" "$PWD/dist" + + echo "==> Updating RPM repo..." + for file in `ls -1 "${BUILD_OUTPUT}/rpm"`; do + rpmsign --addsign "${BUILD_OUTPUT}/rpm/$file" + done + + cp "${BUILD_OUTPUT}/rpm/"*.rpm "$RPM_REPO/" + + createrepo_c "$RPM_REPO" + + echo "==> Signing repomd.xml..." + qubes-gpg-client --local-user "$KEYID" --detach-sign --armor "$RPM_REPO/repodata/repomd.xml" > "$RPM_REPO/repodata/repomd.xml.asc" done -cp "${BUILD_OUTPUT}/rpm/"*.rpm "$RPM_REPO/" - -createrepo_c "$RPM_REPO" - -echo "==> Signing repomd.xml..." -qubes-gpg-client --local-user "$KEYID" --detach-sign --armor "$RPM_REPO/repodata/repomd.xml" > "$RPM_REPO/repodata/repomd.xml.asc" - echo "==> Syncing repo to server..." rsync -aHPvz --exclude=.git --delete "$REPO_ROOT/" "$REMOTE/" From 781efef4678d4ee1d176a264d62423aefe6680b6 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Fri, 2 Jan 2026 20:19:47 +1100 Subject: [PATCH 24/69] Don't accidentally add extra_paths role to usr_local_custom list, resulting in extra_paths appearing twice in manifested playbook --- CHANGELOG.md | 4 ++++ enroll/manifest.py | 2 -- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 49217f0..8283b5b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +# 0.2.1 + + * Don't accidentally add extra_paths role to usr_local_custom list, resulting in extra_paths appearing twice in manifested playbook + # 0.2.0 * Add version CLI arg diff --git a/enroll/manifest.py b/enroll/manifest.py index bc629bb..839ebab 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -1551,8 +1551,6 @@ User-requested extra file harvesting. manifested_extra_paths_roles.append(role) - manifested_usr_local_custom_roles.append(role) - # ------------------------- # Service roles # ------------------------- From c88405ef01510b554846b55a5d3dd9593bb46352 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Fri, 2 Jan 2026 21:10:32 +1100 Subject: [PATCH 25/69] Ensure directories in the tree of anything included with --include are defined in the state and manifest so we make dirs before we try to create files --- CHANGELOG.md | 1 + enroll/fsutil.py | 2 +- enroll/harvest.py | 77 ++++++++++++++++++++++++++++++++++++++++++++++ enroll/ignore.py | 30 ++++++++++++++++++ enroll/manifest.py | 65 +++++++++++++++++++++++++++++++++++--- 5 files changed, 170 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8283b5b..3c41210 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # 0.2.1 * Don't accidentally add extra_paths role to usr_local_custom list, resulting in extra_paths appearing twice in manifested playbook + * Ensure directories in the tree of anything included with --include are defined in the state and manifest so we make dirs before we try to create files # 0.2.0 diff --git a/enroll/fsutil.py b/enroll/fsutil.py index 3d18df6..c852b9e 100644 --- a/enroll/fsutil.py +++ b/enroll/fsutil.py @@ -24,7 +24,7 @@ def stat_triplet(path: str) -> Tuple[str, str, str]: mode is a zero-padded octal string (e.g. "0644"). """ st = os.stat(path, follow_symlinks=True) - mode = oct(st.st_mode & 0o777)[2:].zfill(4) + mode = oct(st.st_mode & 0o7777)[2:].zfill(4) import grp import pwd diff --git a/enroll/harvest.py b/enroll/harvest.py index 74ac516..98e1404 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -34,6 +34,15 @@ class ManagedFile: reason: str +@dataclass +class ManagedDir: + path: str + owner: str + group: str + mode: str + reason: str + + @dataclass class ExcludedFile: path: str @@ -109,6 +118,7 @@ class ExtraPathsSnapshot: role_name: str include_patterns: List[str] exclude_patterns: List[str] + managed_dirs: List[ManagedDir] managed_files: List[ManagedFile] excluded: List[ExcludedFile] notes: List[str] @@ -1484,12 +1494,78 @@ def harvest( extra_notes: List[str] = [] extra_excluded: List[ExcludedFile] = [] extra_managed: List[ManagedFile] = [] + extra_managed_dirs: List[ManagedDir] = [] + extra_dir_seen: Set[str] = set() + + def _walk_and_capture_dirs(root: str) -> None: + root = os.path.normpath(root) + if not root.startswith("/"): + root = "/" + root + if not os.path.isdir(root) or os.path.islink(root): + return + for dirpath, dirnames, _ in os.walk(root, followlinks=False): + if len(extra_managed_dirs) >= MAX_FILES_CAP: + extra_notes.append( + f"Reached directory cap ({MAX_FILES_CAP}) while scanning {root}." + ) + return + dirpath = os.path.normpath(dirpath) + if not dirpath.startswith("/"): + dirpath = "/" + dirpath + if path_filter.is_excluded(dirpath): + # Prune excluded subtrees. + dirnames[:] = [] + continue + if os.path.islink(dirpath) or not os.path.isdir(dirpath): + dirnames[:] = [] + continue + + if dirpath not in extra_dir_seen: + deny = policy.deny_reason_dir(dirpath) + if not deny: + try: + owner, group, mode = stat_triplet(dirpath) + extra_managed_dirs.append( + ManagedDir( + path=dirpath, + owner=owner, + group=group, + mode=mode, + reason="user_include_dir", + ) + ) + except OSError: + pass + extra_dir_seen.add(dirpath) + + # Prune excluded dirs and symlinks early. + pruned: List[str] = [] + for d in dirnames: + p = os.path.join(dirpath, d) + if os.path.islink(p) or path_filter.is_excluded(p): + continue + pruned.append(d) + dirnames[:] = pruned + extra_role_name = "extra_paths" extra_role_seen = seen_by_role.setdefault(extra_role_name, set()) include_specs = list(include_paths or []) exclude_specs = list(exclude_paths or []) + # If any include pattern points at a directory, capture that directory tree's + # ownership/mode so the manifest can recreate it accurately. + include_pats = path_filter.iter_include_patterns() + for pat in include_pats: + if pat.kind == "prefix": + p = pat.value + if os.path.isdir(p) and not os.path.islink(p): + _walk_and_capture_dirs(p) + elif pat.kind == "glob": + for h in glob.glob(pat.value, recursive=True): + if os.path.isdir(h) and not os.path.islink(h): + _walk_and_capture_dirs(h) + if include_specs: extra_notes.append("User include patterns:") extra_notes.extend([f"- {p}" for p in include_specs]) @@ -1529,6 +1605,7 @@ def harvest( role_name=extra_role_name, include_patterns=include_specs, exclude_patterns=exclude_specs, + managed_dirs=extra_managed_dirs, managed_files=extra_managed, excluded=extra_excluded, notes=extra_notes, diff --git a/enroll/ignore.py b/enroll/ignore.py index 904997f..895c030 100644 --- a/enroll/ignore.py +++ b/enroll/ignore.py @@ -137,3 +137,33 @@ class IgnorePolicy: return "sensitive_content" return None + + def deny_reason_dir(self, path: str) -> Optional[str]: + """Directory-specific deny logic. + + deny_reason() is file-oriented (it rejects directories as "not_regular_file"). + For directory metadata capture (so roles can recreate directory trees), we need + a lighter-weight check: + - apply deny_globs (unless dangerous) + - require the path to be a real directory (no symlink) + - ensure it's stat'able/readable + + No size checks or content scanning are performed for directories. + """ + if not self.dangerous: + for g in self.deny_globs or []: + if fnmatch.fnmatch(path, g): + return "denied_path" + + try: + os.stat(path, follow_symlinks=True) + except OSError: + return "unreadable" + + if os.path.islink(path): + return "symlink" + + if not os.path.isdir(path): + return "not_directory" + + return None diff --git a/enroll/manifest.py b/enroll/manifest.py index 839ebab..a373773 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -344,6 +344,29 @@ def _write_role_defaults(role_dir: str, mapping: Dict[str, Any]) -> None: f.write(out) +def _build_managed_dirs_var( + managed_dirs: List[Dict[str, Any]], +) -> List[Dict[str, Any]]: + """Convert enroll managed_dirs into an Ansible-friendly list of dicts. + + Each dict drives a role task loop and is safe across hosts. + """ + out: List[Dict[str, Any]] = [] + for d in managed_dirs: + dest = d.get("path") or "" + if not dest: + continue + out.append( + { + "dest": dest, + "owner": d.get("owner") or "root", + "group": d.get("group") or "root", + "mode": d.get("mode") or "0755", + } + ) + return out + + def _build_managed_files_var( managed_files: List[Dict[str, Any]], templated_src_rels: Set[str], @@ -390,7 +413,22 @@ def _render_generic_files_tasks( # Using first_found makes roles work in both modes: # - site-mode: inventory/host_vars///.files/... # - non-site: roles//files/... - return f"""- name: Deploy any systemd unit files (templates) + return f"""- name: Ensure managed directories exist (preserve owner/group/mode) + ansible.builtin.file: + path: "{{{{ item.dest }}}}" + state: directory + owner: "{{{{ item.owner }}}}" + group: "{{{{ item.group }}}}" + mode: "{{{{ item.mode }}}}" + loop: "{{{{ {var_prefix}_managed_dirs | default([]) }}}}" + +- name: Ensure destination directories exist + ansible.builtin.file: + path: "{{{{ item.dest | dirname }}}}" + state: directory + loop: "{{{{ {var_prefix}_managed_files | default([]) }}}}" + +- name: Deploy any systemd unit files (templates) ansible.builtin.template: src: "{{{{ item.src_rel }}}}.j2" dest: "{{{{ item.dest }}}}" @@ -1444,13 +1482,17 @@ Unowned /etc config files not attributed to packages or services. # ------------------------- # extra_paths role (user-requested includes) # ------------------------- - if extra_paths_snapshot and extra_paths_snapshot.get("managed_files"): + if extra_paths_snapshot and ( + extra_paths_snapshot.get("managed_files") + or extra_paths_snapshot.get("managed_dirs") + ): role = extra_paths_snapshot.get("role_name", "extra_paths") role_dir = os.path.join(roles_root, role) _write_role_scaffold(role_dir) var_prefix = role + managed_dirs = extra_paths_snapshot.get("managed_dirs", []) or [] managed_files = extra_paths_snapshot.get("managed_files", []) excluded = extra_paths_snapshot.get("excluded", []) notes = extra_paths_snapshot.get("notes", []) @@ -1489,12 +1531,23 @@ Unowned /etc config files not attributed to packages or services. notify_systemd=None, ) + dirs_var = _build_managed_dirs_var(managed_dirs) + jt_map = _yaml_load_mapping(jt_vars) if jt_vars.strip() else {} - vars_map: Dict[str, Any] = {f"{var_prefix}_managed_files": files_var} + vars_map: Dict[str, Any] = { + f"{var_prefix}_managed_dirs": dirs_var, + f"{var_prefix}_managed_files": files_var, + } vars_map = _merge_mappings_overwrite(vars_map, jt_map) if site_mode: - _write_role_defaults(role_dir, {f"{var_prefix}_managed_files": []}) + _write_role_defaults( + role_dir, + { + f"{var_prefix}_managed_dirs": [], + f"{var_prefix}_managed_files": [], + }, + ) _write_hostvars(out_dir, fqdn or "", role, vars_map) else: _write_role_defaults(role_dir, vars_map) @@ -1530,6 +1583,10 @@ User-requested extra file harvesting. """ + ("\n".join([f"- {p}" for p in exclude_pats]) or "- (none)") + """\n +## Managed directories +""" + + ("\n".join([f"- {d.get('path')}" for d in managed_dirs]) or "- (none)") + + """\n ## Managed files """ + ("\n".join([f"- {mf.get('path')}" for mf in managed_files]) or "- (none)") From 29b52d451d4d477ea2f9d05fdc5c85fe8f8ecd16 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Fri, 2 Jan 2026 21:29:16 +1100 Subject: [PATCH 26/69] 0.2.1 --- debian/changelog | 7 +++++++ pyproject.toml | 2 +- rpm/enroll.spec | 5 ++++- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/debian/changelog b/debian/changelog index f050e7f..dbc7548 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,10 @@ +enroll (0.2.1) unstable; urgency=medium + + * Don't accidentally add extra_paths role to usr_local_custom list, resulting in extra_paths appearing twice in manifested playbook + * Ensure directories in the tree of anything included with --include are defined in the state and manifest so we make dirs before we try to create files + + -- Miguel Jacq Fri, 01 Jan 2026 21:30:00 +1100 + enroll (0.2.0) unstable; urgency=medium * Add version CLI arg diff --git a/pyproject.toml b/pyproject.toml index 683a9b2..34f411e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "enroll" -version = "0.2.0" +version = "0.2.1" description = "Enroll a server's running state retrospectively into Ansible" authors = ["Miguel Jacq "] license = "GPL-3.0-or-later" diff --git a/rpm/enroll.spec b/rpm/enroll.spec index 3beac03..8fc8cac 100644 --- a/rpm/enroll.spec +++ b/rpm/enroll.spec @@ -1,4 +1,4 @@ -%global upstream_version 0.2.0 +%global upstream_version 0.2.1 Name: enroll Version: %{upstream_version} @@ -43,6 +43,9 @@ Enroll a server's running state retrospectively into Ansible. %{_bindir}/enroll %changelog +* Fri Jan 01 2026 Miguel Jacq - %{version}-%{release} +- Don't accidentally add extra_paths role to usr_local_custom list, resulting in extra_paths appearing twice in manifested playbook +- Ensure directories in the tree of anything included with --include are defined in the state and manifest so we make dirs before we try to create files * Mon Dec 29 2025 Miguel Jacq - %{version}-%{release} - Add version CLI arg - Add ability to enroll RH-style systems (DNF5/DNF/RPM) From 824010b2ab15865b0c1845d8cc9e67a80c7accf2 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sat, 3 Jan 2026 11:39:57 +1100 Subject: [PATCH 27/69] Several bug fixes and prep for 0.2.2 - Fix stat() of parent directory so that we set directory perms correct on --include paths. - Set pty for remote calls when sudo is required, to help systems with limits on sudo without pty --- CHANGELOG.md | 5 ++ README.md | 2 +- debian/changelog | 7 ++ enroll/harvest.py | 171 +++++++++++++++++++++++++++++++++++++++------ enroll/manifest.py | 68 ++++++++++++++---- enroll/remote.py | 34 +++++---- pyproject.toml | 2 +- release.sh | 16 ++--- rpm/enroll.spec | 5 +- 9 files changed, 249 insertions(+), 61 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c41210..0740cb4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +# 0.2.2 + + * Fix stat() of parent directory so that we set directory perms correct on --include paths. + * Set pty for remote calls when sudo is required, to help systems with limits on sudo without pty + # 0.2.1 * Don't accidentally add extra_paths role to usr_local_custom list, resulting in extra_paths appearing twice in manifested playbook diff --git a/README.md b/README.md index f4920b5..e399633 100644 --- a/README.md +++ b/README.md @@ -199,7 +199,7 @@ sudo rpm --import https://mig5.net/static/mig5.asc sudo tee /etc/yum.repos.d/mig5.repo > /dev/null << 'EOF' [mig5] name=mig5 Repository -baseurl=https://rpm.mig5.net/rpm/$basearch +baseurl=https://rpm.mig5.net/rpm/$releasever/$basearch enabled=1 gpgcheck=1 repo_gpgcheck=1 diff --git a/debian/changelog b/debian/changelog index dbc7548..8c2f4b9 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,10 @@ +enroll (0.2.2) unstable; urgency=medium + + * Fix stat() of parent directory so that we set directory perms correct on --include paths. + * Set pty for remote calls when sudo is required, to help systems with limits on sudo without pty + + -- Miguel Jacq Sat, 02 Jan 2026 09:56:00 +1100 + enroll (0.2.1) unstable; urgency=medium * Don't accidentally add extra_paths role to usr_local_custom list, resulting in extra_paths appearing twice in manifested playbook diff --git a/enroll/harvest.py b/enroll/harvest.py index 98e1404..7aba7c6 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -6,7 +6,7 @@ import os import re import shutil import time -from dataclasses import dataclass, asdict +from dataclasses import dataclass, asdict, field from typing import Dict, List, Optional, Set from .systemd import ( @@ -58,59 +58,66 @@ class ServiceSnapshot: sub_state: Optional[str] unit_file_state: Optional[str] condition_result: Optional[str] - managed_files: List[ManagedFile] - excluded: List[ExcludedFile] - notes: List[str] + managed_dirs: List[ManagedDir] = field(default_factory=list) + managed_files: List[ManagedFile] = field(default_factory=list) + excluded: List[ExcludedFile] = field(default_factory=list) + notes: List[str] = field(default_factory=list) @dataclass class PackageSnapshot: package: str role_name: str - managed_files: List[ManagedFile] - excluded: List[ExcludedFile] - notes: List[str] + managed_dirs: List[ManagedDir] = field(default_factory=list) + managed_files: List[ManagedFile] = field(default_factory=list) + excluded: List[ExcludedFile] = field(default_factory=list) + notes: List[str] = field(default_factory=list) @dataclass class UsersSnapshot: role_name: str users: List[dict] - managed_files: List[ManagedFile] - excluded: List[ExcludedFile] - notes: List[str] + managed_dirs: List[ManagedDir] = field(default_factory=list) + managed_files: List[ManagedFile] = field(default_factory=list) + excluded: List[ExcludedFile] = field(default_factory=list) + notes: List[str] = field(default_factory=list) @dataclass class AptConfigSnapshot: role_name: str - managed_files: List[ManagedFile] - excluded: List[ExcludedFile] - notes: List[str] + managed_dirs: List[ManagedDir] = field(default_factory=list) + managed_files: List[ManagedFile] = field(default_factory=list) + excluded: List[ExcludedFile] = field(default_factory=list) + notes: List[str] = field(default_factory=list) @dataclass class DnfConfigSnapshot: role_name: str - managed_files: List[ManagedFile] - excluded: List[ExcludedFile] - notes: List[str] + managed_dirs: List[ManagedDir] = field(default_factory=list) + managed_files: List[ManagedFile] = field(default_factory=list) + excluded: List[ExcludedFile] = field(default_factory=list) + notes: List[str] = field(default_factory=list) @dataclass class EtcCustomSnapshot: role_name: str - managed_files: List[ManagedFile] - excluded: List[ExcludedFile] - notes: List[str] + managed_dirs: List[ManagedDir] = field(default_factory=list) + managed_files: List[ManagedFile] = field(default_factory=list) + excluded: List[ExcludedFile] = field(default_factory=list) + notes: List[str] = field(default_factory=list) @dataclass class UsrLocalCustomSnapshot: role_name: str - managed_files: List[ManagedFile] - excluded: List[ExcludedFile] - notes: List[str] + managed_dirs: List[ManagedDir] = field(default_factory=list) + managed_files: List[ManagedFile] = field(default_factory=list) + excluded: List[ExcludedFile] = field(default_factory=list) + notes: List[str] = field(default_factory=list) @dataclass @@ -149,6 +156,71 @@ ALLOWED_UNOWNED_EXTS = { MAX_FILES_CAP = 4000 MAX_UNOWNED_FILES_PER_ROLE = 500 + +def _merge_parent_dirs( + existing_dirs: List[ManagedDir], + managed_files: List[ManagedFile], + *, + policy: IgnorePolicy, +) -> List[ManagedDir]: + """Ensure parent directories for managed_files are present in managed_dirs. + + This is used so the Ansible manifest can create destination directories with + explicit owner/group/mode (ansible-lint friendly) without needing a separate + "mkdir without perms" task. + + We only add the immediate parent directory for each managed file. For + explicit directory includes (extra_paths), existing_dirs will already + contain the walked directory tree. + """ + by_path: Dict[str, ManagedDir] = { + d.path: d for d in (existing_dirs or []) if d.path + } + + for mf in managed_files or []: + p = str(mf.path or "").rstrip("/") + if not p: + continue + dpath = os.path.dirname(p) + if not dpath or dpath == "/": + continue + if dpath in by_path: + continue + + # Directory-deny logic: newer IgnorePolicy implementations provide + # deny_reason_dir(). Older/simple policies (including unit tests) may + # only implement deny_reason(), which is file-oriented and may return + # "not_regular_file" for directories. + deny = None + deny_dir = getattr(policy, "deny_reason_dir", None) + if callable(deny_dir): + deny = deny_dir(dpath) + else: + deny = policy.deny_reason(dpath) + if deny in ("not_regular_file", "not_file", "not_regular"): + deny = None + if deny: + # If the file itself was captured, its parent directory is likely safe, + # but still respect deny globs for directories to avoid managing + # sensitive/forbidden trees. + continue + + try: + owner, group, mode = stat_triplet(dpath) + except OSError: + continue + + by_path[dpath] = ManagedDir( + path=dpath, + owner=owner, + group=group, + mode=mode, + reason="parent_of_managed_file", + ) + + return [by_path[k] for k in sorted(by_path)] + + # Directories that are shared across many packages. # Never attribute all unowned files in these trees # to one single package. @@ -1521,7 +1593,14 @@ def harvest( continue if dirpath not in extra_dir_seen: - deny = policy.deny_reason_dir(dirpath) + deny = None + deny_dir = getattr(policy, "deny_reason_dir", None) + if callable(deny_dir): + deny = deny_dir(dirpath) + else: + deny = policy.deny_reason(dirpath) + if deny in ("not_regular_file", "not_file", "not_regular"): + deny = None if not deny: try: owner, group, mode = stat_triplet(dirpath) @@ -1661,6 +1740,52 @@ def harvest( "roles": roles, } + # Ensure every role has explicit managed_dirs for parent directories of managed files. + # This lets the manifest create directories with owner/group/mode (ansible-lint friendly) + # without a separate "mkdir without perms" task. + users_snapshot.managed_dirs = _merge_parent_dirs( + users_snapshot.managed_dirs, users_snapshot.managed_files, policy=policy + ) + for s in service_snaps: + s.managed_dirs = _merge_parent_dirs( + s.managed_dirs, s.managed_files, policy=policy + ) + for p in pkg_snaps: + p.managed_dirs = _merge_parent_dirs( + p.managed_dirs, p.managed_files, policy=policy + ) + + if apt_config_snapshot: + apt_config_snapshot.managed_dirs = _merge_parent_dirs( + apt_config_snapshot.managed_dirs, + apt_config_snapshot.managed_files, + policy=policy, + ) + if dnf_config_snapshot: + dnf_config_snapshot.managed_dirs = _merge_parent_dirs( + dnf_config_snapshot.managed_dirs, + dnf_config_snapshot.managed_files, + policy=policy, + ) + if etc_custom_snapshot: + etc_custom_snapshot.managed_dirs = _merge_parent_dirs( + etc_custom_snapshot.managed_dirs, + etc_custom_snapshot.managed_files, + policy=policy, + ) + if usr_local_custom_snapshot: + usr_local_custom_snapshot.managed_dirs = _merge_parent_dirs( + usr_local_custom_snapshot.managed_dirs, + usr_local_custom_snapshot.managed_files, + policy=policy, + ) + if extra_paths_snapshot: + extra_paths_snapshot.managed_dirs = _merge_parent_dirs( + extra_paths_snapshot.managed_dirs, + extra_paths_snapshot.managed_files, + policy=policy, + ) + state = { "enroll": { "version": get_enroll_version(), diff --git a/enroll/manifest.py b/enroll/manifest.py index a373773..f30e5f3 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -422,12 +422,6 @@ def _render_generic_files_tasks( mode: "{{{{ item.mode }}}}" loop: "{{{{ {var_prefix}_managed_dirs | default([]) }}}}" -- name: Ensure destination directories exist - ansible.builtin.file: - path: "{{{{ item.dest | dirname }}}}" - state: directory - loop: "{{{{ {var_prefix}_managed_files | default([]) }}}}" - - name: Deploy any systemd unit files (templates) ansible.builtin.template: src: "{{{{ item.src_rel }}}}.j2" @@ -983,6 +977,7 @@ Generated non-system user accounts and SSH public material. var_prefix = role managed_files = apt_config_snapshot.get("managed_files", []) + managed_dirs = apt_config_snapshot.get("managed_dirs", []) or [] excluded = apt_config_snapshot.get("excluded", []) notes = apt_config_snapshot.get("notes", []) @@ -1019,12 +1014,20 @@ Generated non-system user accounts and SSH public material. notify_systemd=None, ) + dirs_var = _build_managed_dirs_var(managed_dirs) + jt_map = _yaml_load_mapping(jt_vars) if jt_vars.strip() else {} - vars_map: Dict[str, Any] = {f"{var_prefix}_managed_files": files_var} + vars_map: Dict[str, Any] = { + f"{var_prefix}_managed_files": files_var, + f"{var_prefix}_managed_dirs": dirs_var, + } vars_map = _merge_mappings_overwrite(vars_map, jt_map) if site_mode: - _write_role_defaults(role_dir, {f"{var_prefix}_managed_files": []}) + _write_role_defaults( + role_dir, + {f"{var_prefix}_managed_files": [], f"{var_prefix}_managed_dirs": []}, + ) _write_hostvars(out_dir, fqdn or "", role, vars_map) else: _write_role_defaults(role_dir, vars_map) @@ -1134,6 +1137,7 @@ APT configuration harvested from the system (sources, pinning, and keyrings). var_prefix = role managed_files = dnf_config_snapshot.get("managed_files", []) + managed_dirs = dnf_config_snapshot.get("managed_dirs", []) or [] excluded = dnf_config_snapshot.get("excluded", []) notes = dnf_config_snapshot.get("notes", []) @@ -1169,12 +1173,20 @@ APT configuration harvested from the system (sources, pinning, and keyrings). notify_systemd=None, ) + dirs_var = _build_managed_dirs_var(managed_dirs) + jt_map = _yaml_load_mapping(jt_vars) if jt_vars.strip() else {} - vars_map: Dict[str, Any] = {f"{var_prefix}_managed_files": files_var} + vars_map: Dict[str, Any] = { + f"{var_prefix}_managed_files": files_var, + f"{var_prefix}_managed_dirs": dirs_var, + } vars_map = _merge_mappings_overwrite(vars_map, jt_map) if site_mode: - _write_role_defaults(role_dir, {f"{var_prefix}_managed_files": []}) + _write_role_defaults( + role_dir, + {f"{var_prefix}_managed_files": [], f"{var_prefix}_managed_dirs": []}, + ) _write_hostvars(out_dir, fqdn or "", role, vars_map) else: _write_role_defaults(role_dir, vars_map) @@ -1285,6 +1297,7 @@ DNF/YUM configuration harvested from the system (repos, config files, and RPM GP var_prefix = role managed_files = etc_custom_snapshot.get("managed_files", []) + managed_dirs = etc_custom_snapshot.get("managed_dirs", []) or [] excluded = etc_custom_snapshot.get("excluded", []) notes = etc_custom_snapshot.get("notes", []) @@ -1321,12 +1334,20 @@ DNF/YUM configuration harvested from the system (repos, config files, and RPM GP notify_systemd="Run systemd daemon-reload", ) + dirs_var = _build_managed_dirs_var(managed_dirs) + jt_map = _yaml_load_mapping(jt_vars) if jt_vars.strip() else {} - vars_map: Dict[str, Any] = {f"{var_prefix}_managed_files": files_var} + vars_map: Dict[str, Any] = { + f"{var_prefix}_managed_files": files_var, + f"{var_prefix}_managed_dirs": dirs_var, + } vars_map = _merge_mappings_overwrite(vars_map, jt_map) if site_mode: - _write_role_defaults(role_dir, {f"{var_prefix}_managed_files": []}) + _write_role_defaults( + role_dir, + {f"{var_prefix}_managed_files": [], f"{var_prefix}_managed_dirs": []}, + ) _write_hostvars(out_dir, fqdn or "", role, vars_map) else: _write_role_defaults(role_dir, vars_map) @@ -1395,6 +1416,7 @@ Unowned /etc config files not attributed to packages or services. var_prefix = role managed_files = usr_local_custom_snapshot.get("managed_files", []) + managed_dirs = usr_local_custom_snapshot.get("managed_dirs", []) or [] excluded = usr_local_custom_snapshot.get("excluded", []) notes = usr_local_custom_snapshot.get("notes", []) @@ -1431,12 +1453,20 @@ Unowned /etc config files not attributed to packages or services. notify_systemd=None, ) + dirs_var = _build_managed_dirs_var(managed_dirs) + jt_map = _yaml_load_mapping(jt_vars) if jt_vars.strip() else {} - vars_map: Dict[str, Any] = {f"{var_prefix}_managed_files": files_var} + vars_map: Dict[str, Any] = { + f"{var_prefix}_managed_files": files_var, + f"{var_prefix}_managed_dirs": dirs_var, + } vars_map = _merge_mappings_overwrite(vars_map, jt_map) if site_mode: - _write_role_defaults(role_dir, {f"{var_prefix}_managed_files": []}) + _write_role_defaults( + role_dir, + {f"{var_prefix}_managed_files": [], f"{var_prefix}_managed_dirs": []}, + ) _write_hostvars(out_dir, fqdn or "", role, vars_map) else: _write_role_defaults(role_dir, vars_map) @@ -1616,6 +1646,7 @@ User-requested extra file harvesting. unit = svc["unit"] pkgs = svc.get("packages", []) or [] managed_files = svc.get("managed_files", []) or [] + managed_dirs = svc.get("managed_dirs", []) or [] role_dir = os.path.join(roles_root, role) _write_role_scaffold(role_dir) @@ -1660,11 +1691,14 @@ User-requested extra file harvesting. notify_systemd="Run systemd daemon-reload", ) + dirs_var = _build_managed_dirs_var(managed_dirs) + jt_map = _yaml_load_mapping(jt_vars) if jt_vars.strip() else {} base_vars: Dict[str, Any] = { f"{var_prefix}_unit_name": unit, f"{var_prefix}_packages": pkgs, f"{var_prefix}_managed_files": files_var, + f"{var_prefix}_managed_dirs": dirs_var, f"{var_prefix}_manage_unit": True, f"{var_prefix}_systemd_enabled": bool(enabled_at_harvest), f"{var_prefix}_systemd_state": desired_state, @@ -1679,6 +1713,7 @@ User-requested extra file harvesting. f"{var_prefix}_unit_name": unit, f"{var_prefix}_packages": [], f"{var_prefix}_managed_files": [], + f"{var_prefix}_managed_dirs": [], f"{var_prefix}_manage_unit": False, f"{var_prefix}_systemd_enabled": False, f"{var_prefix}_systemd_state": "stopped", @@ -1782,6 +1817,7 @@ Generated from `{unit}`. role = pr["role_name"] pkg = pr.get("package") or "" managed_files = pr.get("managed_files", []) or [] + managed_dirs = pr.get("managed_dirs", []) or [] role_dir = os.path.join(roles_root, role) _write_role_scaffold(role_dir) @@ -1823,10 +1859,13 @@ Generated from `{unit}`. notify_systemd="Run systemd daemon-reload", ) + dirs_var = _build_managed_dirs_var(managed_dirs) + jt_map = _yaml_load_mapping(jt_vars) if jt_vars.strip() else {} base_vars: Dict[str, Any] = { f"{var_prefix}_packages": pkgs, f"{var_prefix}_managed_files": files_var, + f"{var_prefix}_managed_dirs": dirs_var, } base_vars = _merge_mappings_overwrite(base_vars, jt_map) @@ -1836,6 +1875,7 @@ Generated from `{unit}`. { f"{var_prefix}_packages": [], f"{var_prefix}_managed_files": [], + f"{var_prefix}_managed_dirs": [], }, ) _write_hostvars(out_dir, fqdn or "", role, base_vars) diff --git a/enroll/remote.py b/enroll/remote.py index 9618512..b86cd08 100644 --- a/enroll/remote.py +++ b/enroll/remote.py @@ -16,7 +16,6 @@ def _safe_extract_tar(tar: tarfile.TarFile, dest: Path) -> None: Protects against path traversal (e.g. entries containing ../). """ - # Note: tar member names use POSIX separators regardless of platform. dest = dest.resolve() @@ -80,9 +79,18 @@ def _build_enroll_pyz(tmpdir: Path) -> Path: return pyz_path -def _ssh_run(ssh, cmd: str) -> tuple[int, str, str]: - """Run a command over a Paramiko SSHClient.""" - _stdin, stdout, stderr = ssh.exec_command(cmd) +def _ssh_run(ssh, cmd: str, *, get_pty: bool = False) -> tuple[int, str, str]: + """Run a command over a Paramiko SSHClient. + + Paramiko's exec_command runs commands without a TTY by default. + Some hosts have sudoers "requiretty" enabled, which causes sudo to + fail even when passwordless sudo is configured. For those commands, + request a PTY. + + We do not request a PTY for commands that stream binary data + (e.g. tar/gzip output), as a PTY can corrupt the byte stream. + """ + _stdin, stdout, stderr = ssh.exec_command(cmd, get_pty=get_pty) out = stdout.read().decode("utf-8", errors="replace") err = stderr.read().decode("utf-8", errors="replace") rc = stdout.channel.recv_exit_status() @@ -105,7 +113,6 @@ def remote_harvest( Returns the local path to state.json inside local_out_dir. """ - try: import paramiko # type: ignore except Exception as e: @@ -182,34 +189,35 @@ def remote_harvest( for p in exclude_paths or []: argv.extend(["--exclude-path", str(p)]) - _cmd = " ".join(shlex.quote(a) for a in argv) - if not no_sudo: - cmd = f"sudo {_cmd}" - else: - cmd = _cmd - rc, out, err = _ssh_run(ssh, cmd) + _cmd = " ".join(map(shlex.quote, argv)) + cmd = f"sudo {_cmd}" if not no_sudo else _cmd + + # PTY for sudo commands (helps sudoers requiretty). + rc, out, err = _ssh_run(ssh, cmd, get_pty=(not no_sudo)) if rc != 0: raise RuntimeError( "Remote harvest failed.\n" f"Command: {cmd}\n" f"Exit code: {rc}\n" + f"Stdout: {out.strip()}\n" f"Stderr: {err.strip()}" ) if not no_sudo: - # Ensure user can read the files, before we tar it + # Ensure user can read the files, before we tar it. if not resolved_user: raise RuntimeError( "Unable to determine remote username for chown. " "Pass --remote-user explicitly or use --no-sudo." ) cmd = f"sudo chown -R {resolved_user} {rbundle}" - rc, out, err = _ssh_run(ssh, cmd) + rc, out, err = _ssh_run(ssh, cmd, get_pty=True) if rc != 0: raise RuntimeError( "chown of harvest failed.\n" f"Command: {cmd}\n" f"Exit code: {rc}\n" + f"Stdout: {out.strip()}\n" f"Stderr: {err.strip()}" ) diff --git a/pyproject.toml b/pyproject.toml index 34f411e..72dd732 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "enroll" -version = "0.2.1" +version = "0.2.2" description = "Enroll a server's running state retrospectively into Ansible" authors = ["Miguel Jacq "] license = "GPL-3.0-or-later" diff --git a/release.sh b/release.sh index 0a052c7..db3f27b 100755 --- a/release.sh +++ b/release.sh @@ -44,14 +44,11 @@ for dist in ${DISTS[@]}; do done # RPM -REPO_ROOT="${HOME}/git/repo_rpm" -RPM_REPO="${REPO_ROOT}/rpm/x86_64" -BUILD_OUTPUT="${HOME}/git/enroll/dist" -REMOTE="letessier.mig5.net:/opt/repo_rpm" -KEYID="00AE817C24A10C2540461A9C1D7CDE0234DB458D" - -mkdir -p "$RPM_REPO" sudo apt-get -y install createrepo-c rpm +BUILD_OUTPUT="${HOME}/git/enroll/dist" +KEYID="00AE817C24A10C2540461A9C1D7CDE0234DB458D" +REPO_ROOT="${HOME}/git/repo_rpm" +REMOTE="letessier.mig5.net:/opt/repo_rpm" DISTS=( fedora:43 @@ -60,6 +57,10 @@ DISTS=( for dist in ${DISTS[@]}; do release=$(echo ${dist} | cut -d: -f2) + REPO_RELEASE_ROOT="${REPO_ROOT}/fc${release}" + RPM_REPO="${REPO_RELEASE_ROOT}/rpm/x86_64" + mkdir -p "$RPM_REPO" + docker build \ --no-cache \ -f Dockerfile.rpmbuild \ @@ -71,7 +72,6 @@ for dist in ${DISTS[@]}; do docker run --rm -v "$PWD":/src -v "$PWD/dist/rpm":/out -v "$HOME/git/jinjaturtle/dist/rpm":/deps:ro enroll-rpm:${release} sudo chown -R "${USER}" "$PWD/dist" - echo "==> Updating RPM repo..." for file in `ls -1 "${BUILD_OUTPUT}/rpm"`; do rpmsign --addsign "${BUILD_OUTPUT}/rpm/$file" done diff --git a/rpm/enroll.spec b/rpm/enroll.spec index 8fc8cac..12286fa 100644 --- a/rpm/enroll.spec +++ b/rpm/enroll.spec @@ -1,4 +1,4 @@ -%global upstream_version 0.2.1 +%global upstream_version 0.2.2 Name: enroll Version: %{upstream_version} @@ -43,6 +43,9 @@ Enroll a server's running state retrospectively into Ansible. %{_bindir}/enroll %changelog +* Sat Jan 02 2026 Miguel Jacq - %{version}-%{release} +- Fix stat() of parent directory so that we set directory perms correct on --include paths. +- Set pty for remote calls when sudo is required, to help systems with limits on sudo without pty * Fri Jan 01 2026 Miguel Jacq - %{version}-%{release} - Don't accidentally add extra_paths role to usr_local_custom list, resulting in extra_paths appearing twice in manifested playbook - Ensure directories in the tree of anything included with --include are defined in the state and manifest so we make dirs before we try to create files From 6c3275b44a9ca1ebeac4caec02cb650e996837c5 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sat, 3 Jan 2026 11:46:40 +1100 Subject: [PATCH 28/69] Fix tests --- tests/test_cli_config_and_sops.py | 189 +++++++++++++++++ tests/test_more_coverage.py | 323 ++++++++++++++++++++++++++++++ tests/test_remote.py | 18 +- 3 files changed, 525 insertions(+), 5 deletions(-) create mode 100644 tests/test_cli_config_and_sops.py create mode 100644 tests/test_more_coverage.py diff --git a/tests/test_cli_config_and_sops.py b/tests/test_cli_config_and_sops.py new file mode 100644 index 0000000..7e3fe5b --- /dev/null +++ b/tests/test_cli_config_and_sops.py @@ -0,0 +1,189 @@ +from __future__ import annotations + +import argparse +import configparser +import tarfile +from pathlib import Path + + +def test_discover_config_path_precedence(monkeypatch, tmp_path: Path): + from enroll.cli import _discover_config_path + + cfg = tmp_path / "cfg.ini" + cfg.write_text("[enroll]\n", encoding="utf-8") + + # --no-config always wins + monkeypatch.setenv("ENROLL_CONFIG", str(cfg)) + assert _discover_config_path(["--no-config", "harvest"]) is None + + # explicit --config wins + assert _discover_config_path(["--config", str(cfg), "harvest"]) == cfg + + # env var used when present + assert _discover_config_path(["harvest"]) == cfg + + +def test_discover_config_path_finds_local_and_xdg(monkeypatch, tmp_path: Path): + from enroll.cli import _discover_config_path + + # local file in cwd + cwd = tmp_path / "cwd" + cwd.mkdir() + local = cwd / "enroll.ini" + local.write_text("[enroll]\n", encoding="utf-8") + + monkeypatch.chdir(cwd) + monkeypatch.delenv("ENROLL_CONFIG", raising=False) + monkeypatch.delenv("XDG_CONFIG_HOME", raising=False) + assert _discover_config_path(["harvest"]) == local + + # xdg config fallback + monkeypatch.chdir(tmp_path) + xdg = tmp_path / "xdg" + (xdg / "enroll").mkdir(parents=True) + xcfg = xdg / "enroll" / "enroll.ini" + xcfg.write_text("[enroll]\n", encoding="utf-8") + monkeypatch.setenv("XDG_CONFIG_HOME", str(xdg)) + assert _discover_config_path(["harvest"]) == xcfg + + +def test_section_to_argv_supports_bool_append_count_and_unknown(monkeypatch, capsys): + from enroll.cli import _section_to_argv + + ap = argparse.ArgumentParser(add_help=False) + ap.add_argument("--flag", action="store_true") + ap.add_argument("--no-flag", action="store_false", dest="flag2") + ap.add_argument("--item", action="append", default=[]) + ap.add_argument("-v", action="count", default=0) + + cfg = configparser.ConfigParser() + cfg.read_dict( + { + "enroll": { + "flag": "true", + "no_flag": "false", + "item": "a,b", + "v": "2", + "unknown_key": "zzz", + } + } + ) + + argv = _section_to_argv(ap, cfg, "enroll") + + # bools set + assert "--flag" in argv + assert "--no-flag" in argv + + # append expanded + assert argv.count("--item") == 2 + assert "a" in argv and "b" in argv + + # count flag expanded + assert argv.count("-v") == 2 + + # unknown key prints warning + err = capsys.readouterr().err + assert "unknown option" in err + + +def test_inject_config_argv_inserts_global_and_command_tokens(tmp_path: Path): + from enroll.cli import _inject_config_argv + + root = argparse.ArgumentParser(add_help=False) + root.add_argument("--root-flag", action="store_true") + sub = root.add_subparsers(dest="cmd", required=True) + p_h = sub.add_parser("harvest", add_help=False) + p_h.add_argument("--dangerous", action="store_true") + p_h.add_argument("--include-path", action="append", default=[]) + + cfg_path = tmp_path / "enroll.ini" + cfg_path.write_text( + """[enroll] +root-flag = true + +[harvest] +dangerous = true +include-path = /etc/one,/etc/two +""", + encoding="utf-8", + ) + + argv = ["harvest", "--include-path", "/etc/cli"] + injected = _inject_config_argv( + argv, + cfg_path=cfg_path, + root_parser=root, + subparsers={"harvest": p_h}, + ) + + # global inserted before cmd, subcommand tokens right after cmd + assert injected[:2] == ["--root-flag", "harvest"] + # include-path from config inserted before CLI include-path (CLI wins later if duplicates) + joined = " ".join(injected) + assert "--include-path /etc/one" in joined + assert "--include-path /etc/cli" in joined + + +def test_resolve_sops_out_file_and_encrypt_path(monkeypatch, tmp_path: Path): + from enroll import cli + + # directory output should yield harvest.tar.gz.sops inside + out_dir = tmp_path / "o" + out_dir.mkdir() + assert ( + cli._resolve_sops_out_file(str(out_dir), hint="h").name == "harvest.tar.gz.sops" + ) + + # file-like output retained + out_file = tmp_path / "x.sops" + assert cli._resolve_sops_out_file(str(out_file), hint="h") == out_file + + # None uses cache dir + class HC: + def __init__(self, d: Path): + self.dir = d + + monkeypatch.setattr( + cli, "new_harvest_cache_dir", lambda hint: HC(tmp_path / "cache") + ) + p = cli._resolve_sops_out_file(None, hint="h") + assert str(p).endswith("harvest.tar.gz.sops") + + # Cover _tar_dir_to quickly (writes a tarball) + bundle = tmp_path / "bundle" + bundle.mkdir() + (bundle / "state.json").write_text("{}", encoding="utf-8") + tar_path = tmp_path / "b.tar.gz" + cli._tar_dir_to(bundle, tar_path) + assert tar_path.exists() + with tarfile.open(tar_path, "r:gz") as tf: + names = tf.getnames() + assert "state.json" in names or "./state.json" in names + + +def test_encrypt_harvest_dir_to_sops_cleans_up_tmp_tgz(monkeypatch, tmp_path: Path): + from enroll.cli import _encrypt_harvest_dir_to_sops + + bundle = tmp_path / "bundle" + bundle.mkdir() + (bundle / "state.json").write_text("{}", encoding="utf-8") + out_file = tmp_path / "out.sops" + + seen = {} + + def fake_encrypt(src: Path, dst: Path, pgp_fingerprints, mode): # noqa: ARG001 + # write something so we can see output created + seen["src"] = src + dst.write_bytes(b"enc") + + monkeypatch.setattr("enroll.cli.encrypt_file_binary", fake_encrypt) + + # Make os.unlink raise FileNotFoundError to hit the except branch in finally. + monkeypatch.setattr( + "enroll.cli.os.unlink", lambda p: (_ for _ in ()).throw(FileNotFoundError()) + ) + + res = _encrypt_harvest_dir_to_sops(bundle, out_file, fps=["ABC"]) + assert res == out_file + assert out_file.read_bytes() == b"enc" diff --git a/tests/test_more_coverage.py b/tests/test_more_coverage.py new file mode 100644 index 0000000..2c6693a --- /dev/null +++ b/tests/test_more_coverage.py @@ -0,0 +1,323 @@ +from __future__ import annotations + +import json +import os +import subprocess +import sys +import types +from pathlib import Path +from types import SimpleNamespace + +import pytest + + +def test_cache_dir_defaults_to_home_cache(monkeypatch, tmp_path: Path): + # Ensure default path uses ~/.cache when XDG_CACHE_HOME is unset. + from enroll.cache import enroll_cache_dir + + monkeypatch.delenv("XDG_CACHE_HOME", raising=False) + monkeypatch.setattr(Path, "home", lambda: tmp_path) + + p = enroll_cache_dir() + assert str(p).startswith(str(tmp_path)) + assert p.name == "enroll" + + +def test_harvest_cache_state_json_property(tmp_path: Path): + from enroll.cache import HarvestCache + + hc = HarvestCache(tmp_path / "h1") + assert hc.state_json == hc.dir / "state.json" + + +def test_cache_dir_security_rejects_symlink(tmp_path: Path): + from enroll.cache import _ensure_dir_secure + + real = tmp_path / "real" + real.mkdir() + link = tmp_path / "link" + link.symlink_to(real, target_is_directory=True) + + with pytest.raises(RuntimeError, match="Refusing to use symlink"): + _ensure_dir_secure(link) + + +def test_cache_dir_chmod_failures_are_ignored(monkeypatch, tmp_path: Path): + from enroll import cache + + # Make the cache base path deterministic and writable. + monkeypatch.setattr(cache, "enroll_cache_dir", lambda: tmp_path) + + # Force os.chmod to fail to cover the "except OSError: pass" paths. + monkeypatch.setattr( + os, "chmod", lambda *a, **k: (_ for _ in ()).throw(OSError("nope")) + ) + + hc = cache.new_harvest_cache_dir() + assert hc.dir.exists() + assert hc.dir.is_dir() + + +def test_stat_triplet_falls_back_to_numeric_ids(monkeypatch, tmp_path: Path): + from enroll.fsutil import stat_triplet + import pwd + import grp + + p = tmp_path / "x" + p.write_text("x", encoding="utf-8") + + # Force username/group resolution failures. + monkeypatch.setattr( + pwd, "getpwuid", lambda _uid: (_ for _ in ()).throw(KeyError("no user")) + ) + monkeypatch.setattr( + grp, "getgrgid", lambda _gid: (_ for _ in ()).throw(KeyError("no group")) + ) + + owner, group, mode = stat_triplet(str(p)) + assert owner.isdigit() + assert group.isdigit() + assert len(mode) == 4 + + +def test_ignore_policy_iter_effective_lines_removes_block_comments(): + from enroll.ignore import IgnorePolicy + + pol = IgnorePolicy() + data = b"""keep1 +/* +drop me +*/ +keep2 +""" + assert list(pol.iter_effective_lines(data)) == [b"keep1", b"keep2"] + + +def test_ignore_policy_deny_reason_dir_variants(tmp_path: Path): + from enroll.ignore import IgnorePolicy + + pol = IgnorePolicy() + + # denied by glob + assert pol.deny_reason_dir("/etc/shadow") == "denied_path" + + # symlink rejected + d = tmp_path / "d" + d.mkdir() + link = tmp_path / "l" + link.symlink_to(d, target_is_directory=True) + assert pol.deny_reason_dir(str(link)) == "symlink" + + # not a directory + f = tmp_path / "f" + f.write_text("x", encoding="utf-8") + assert pol.deny_reason_dir(str(f)) == "not_directory" + + # ok + assert pol.deny_reason_dir(str(d)) is None + + +def test_run_jinjaturtle_parses_outputs(monkeypatch, tmp_path: Path): + # Fully unit-test enroll.jinjaturtle.run_jinjaturtle by stubbing subprocess.run. + from enroll.jinjaturtle import run_jinjaturtle + + def fake_run(cmd, **kwargs): # noqa: ARG001 + # cmd includes "-d -t