From 043802e80034b998c84a711e77ae8c69c8e0d137 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 29 Dec 2025 16:10:27 +1100 Subject: [PATCH] Refactor state structure and capture versions of packages --- enroll/debian.py | 44 ++++++++++++++++++ enroll/diff.py | 112 +++++++++++++++++++++++++++++++++++++-------- enroll/harvest.py | 75 ++++++++++++++++++++++++++---- enroll/manifest.py | 27 ++++++----- enroll/platform.py | 21 +++++++++ enroll/rpm.py | 57 +++++++++++++++++++++++ 6 files changed, 294 insertions(+), 42 deletions(-) diff --git a/enroll/debian.py b/enroll/debian.py index 7e1ee2d..9bf847e 100644 --- a/enroll/debian.py +++ b/enroll/debian.py @@ -63,6 +63,50 @@ def list_manual_packages() -> List[str]: return sorted(set(pkgs)) +def list_installed_packages() -> Dict[str, List[Dict[str, str]]]: + """Return mapping of installed package name -> installed instances. + + Uses dpkg-query and is expected to work on Debian/Ubuntu-like systems. + + Output format: + {"pkg": [{"version": "...", "arch": "..."}, ...], ...} + """ + + try: + p = subprocess.run( + [ + "dpkg-query", + "-W", + "-f=${Package}\t${Version}\t${Architecture}\n", + ], + text=True, + capture_output=True, + check=False, + ) # nosec + except Exception: + return {} + + out: Dict[str, List[Dict[str, str]]] = {} + for raw in (p.stdout or "").splitlines(): + line = raw.strip("\n") + if not line: + continue + parts = line.split("\t") + if len(parts) < 3: + continue + name, ver, arch = parts[0].strip(), parts[1].strip(), parts[2].strip() + if not name: + continue + out.setdefault(name, []).append({"version": ver, "arch": arch}) + + # Stable ordering for deterministic JSON dumps. + for k in list(out.keys()): + out[k] = sorted( + out[k], key=lambda x: (x.get("arch") or "", x.get("version") or "") + ) + return out + + def build_dpkg_etc_index( info_dir: str = "/var/lib/dpkg/info", ) -> Tuple[Set[str], Dict[str, str], Dict[str, Set[str]], Dict[str, List[str]]]: diff --git a/enroll/diff.py b/enroll/diff.py index 0110d17..5ad0eac 100644 --- a/enroll/diff.py +++ b/enroll/diff.py @@ -126,18 +126,62 @@ def _load_state(bundle_dir: Path) -> Dict[str, Any]: return json.load(f) +def _packages_inventory(state: Dict[str, Any]) -> Dict[str, Any]: + return (state.get("inventory") or {}).get("packages") or {} + + def _all_packages(state: Dict[str, Any]) -> List[str]: - pkgs = set(state.get("manual_packages", []) or []) - pkgs |= set(state.get("manual_packages_skipped", []) or []) - for s in state.get("services", []) or []: - for p in s.get("packages", []) or []: - pkgs.add(p) - return sorted(pkgs) + return sorted(_packages_inventory(state).keys()) + + +def _roles(state: Dict[str, Any]) -> Dict[str, Any]: + return state.get("roles") or {} + + +def _pkg_version_key(entry: Dict[str, Any]) -> Optional[str]: + """Return a stable string used for version comparison.""" + installs = entry.get("installations") or [] + if isinstance(installs, list) and installs: + parts: List[str] = [] + for inst in installs: + if not isinstance(inst, dict): + continue + arch = str(inst.get("arch") or "") + ver = str(inst.get("version") or "") + if not ver: + continue + parts.append(f"{arch}:{ver}" if arch else ver) + if parts: + return "|".join(sorted(parts)) + v = entry.get("version") + if v: + return str(v) + return None + + +def _pkg_version_display(entry: Dict[str, Any]) -> Optional[str]: + v = entry.get("version") + if v: + return str(v) + installs = entry.get("installations") or [] + if isinstance(installs, list) and installs: + parts: List[str] = [] + for inst in installs: + if not isinstance(inst, dict): + continue + arch = str(inst.get("arch") or "") + ver = str(inst.get("version") or "") + if not ver: + continue + parts.append(f"{ver} ({arch})" if arch else ver) + if parts: + return ", ".join(sorted(parts)) + return None def _service_units(state: Dict[str, Any]) -> Dict[str, Dict[str, Any]]: out: Dict[str, Dict[str, Any]] = {} - for s in state.get("services", []) or []: + for s in _roles(state).get("services") or []: unit = s.get("unit") if unit: out[str(unit)] = s @@ -145,7 +189,7 @@ def _service_units(state: Dict[str, Any]) -> Dict[str, Dict[str, Any]]: def _users_by_name(state: Dict[str, Any]) -> Dict[str, Dict[str, Any]]: - users = (state.get("users") or {}).get("users") or [] + users = (_roles(state).get("users") or {}).get("users") or [] out: Dict[str, Dict[str, Any]] = {} for u in users: name = u.get("name") @@ -167,43 +211,43 @@ class FileRec: def _iter_managed_files(state: Dict[str, Any]) -> Iterable[Tuple[str, Dict[str, Any]]]: # Services - for s in state.get("services", []) or []: + for s in _roles(state).get("services") or []: role = s.get("role_name") or "unknown" for mf in s.get("managed_files", []) or []: yield str(role), mf # Package roles - for p in state.get("package_roles", []) or []: + for p in _roles(state).get("packages") or []: role = p.get("role_name") or "unknown" for mf in p.get("managed_files", []) or []: yield str(role), mf # Users - u = state.get("users") or {} + u = _roles(state).get("users") or {} u_role = u.get("role_name") or "users" for mf in u.get("managed_files", []) or []: yield str(u_role), mf # apt_config - ac = state.get("apt_config") or {} + ac = _roles(state).get("apt_config") or {} ac_role = ac.get("role_name") or "apt_config" for mf in ac.get("managed_files", []) or []: yield str(ac_role), mf # etc_custom - ec = state.get("etc_custom") or {} + ec = _roles(state).get("etc_custom") or {} ec_role = ec.get("role_name") or "etc_custom" for mf in ec.get("managed_files", []) or []: yield str(ec_role), mf # usr_local_custom - ul = state.get("usr_local_custom") or {} + ul = _roles(state).get("usr_local_custom") or {} ul_role = ul.get("role_name") or "usr_local_custom" for mf in ul.get("managed_files", []) or []: yield str(ul_role), mf # extra_paths - xp = state.get("extra_paths") or {} + xp = _roles(state).get("extra_paths") or {} xp_role = xp.get("role_name") or "extra_paths" for mf in xp.get("managed_files", []) or []: yield str(xp_role), mf @@ -261,12 +305,28 @@ def compare_harvests( old_state = _load_state(old_b.dir) new_state = _load_state(new_b.dir) - old_pkgs = set(_all_packages(old_state)) - new_pkgs = set(_all_packages(new_state)) + old_inv = _packages_inventory(old_state) + new_inv = _packages_inventory(new_state) + + old_pkgs = set(old_inv.keys()) + new_pkgs = set(new_inv.keys()) pkgs_added = sorted(new_pkgs - old_pkgs) pkgs_removed = sorted(old_pkgs - new_pkgs) + pkgs_version_changed: List[Dict[str, Any]] = [] + for pkg in sorted(old_pkgs & new_pkgs): + a = old_inv.get(pkg) or {} + b = new_inv.get(pkg) or {} + if _pkg_version_key(a) != _pkg_version_key(b): + pkgs_version_changed.append( + { + "package": pkg, + "old": _pkg_version_display(a), + "new": _pkg_version_display(b), + } + ) + old_units = _service_units(old_state) new_units = _service_units(new_state) units_added = sorted(set(new_units) - set(old_units)) @@ -380,6 +440,7 @@ def compare_harvests( [ pkgs_added, pkgs_removed, + pkgs_version_changed, units_added, units_removed, units_changed, @@ -413,7 +474,11 @@ def compare_harvests( "state_mtime": _mtime_iso(new_b.state_path), "host": (new_state.get("host") or {}).get("hostname"), }, - "packages": {"added": pkgs_added, "removed": pkgs_removed}, + "packages": { + "added": pkgs_added, + "removed": pkgs_removed, + "version_changed": pkgs_version_changed, + }, "services": { "enabled_added": units_added, "enabled_removed": units_removed, @@ -471,10 +536,13 @@ def _report_text(report: Dict[str, Any]) -> str: lines.append("\nPackages") lines.append(f" added: {len(pk.get('added', []) or [])}") lines.append(f" removed: {len(pk.get('removed', []) or [])}") + lines.append(f" version_changed: {len(pk.get('version_changed', []) or [])}") for p in pk.get("added", []) or []: lines.append(f" + {p}") for p in pk.get("removed", []) or []: lines.append(f" - {p}") + for ch in pk.get("version_changed", []) or []: + lines.append(f" ~ {ch.get('package')}: {ch.get('old')} -> {ch.get('new')}") sv = report.get("services", {}) lines.append("\nServices (enabled systemd units)") @@ -542,6 +610,7 @@ def _report_text(report: Dict[str, Any]) -> str: [ (pk.get("added") or []), (pk.get("removed") or []), + (pk.get("version_changed") or []), (sv.get("enabled_added") or []), (sv.get("enabled_removed") or []), (sv.get("changed") or []), @@ -578,6 +647,12 @@ def _report_markdown(report: Dict[str, Any]) -> str: for p in pk.get("removed", []) or []: out.append(f" - `- {p}`\n") + out.append(f"- Version changed: {len(pk.get('version_changed', []) or [])}\n") + for ch in pk.get("version_changed", []) or []: + out.append( + f" - `~ {ch.get('package')}`: `{ch.get('old')}` → `{ch.get('new')}`\n" + ) + sv = report.get("services", {}) out.append("## Services (enabled systemd units)\n") if sv.get("enabled_added"): @@ -672,6 +747,7 @@ def _report_markdown(report: Dict[str, Any]) -> str: [ (pk.get("added") or []), (pk.get("removed") or []), + (pk.get("version_changed") or []), (sv.get("enabled_added") or []), (sv.get("enabled_removed") or []), (sv.get("changed") or []), diff --git a/enroll/harvest.py b/enroll/harvest.py index bb706b1..4ca3984 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -5,6 +5,7 @@ import json import os import re import shutil +import time from dataclasses import dataclass, asdict from typing import Dict, List, Optional, Set @@ -1481,9 +1482,60 @@ def harvest( notes=extra_notes, ) + # ------------------------- + # Inventory: packages (SBOM-ish) + # ------------------------- + installed = backend.installed_packages() or {} + + manual_set: Set[str] = set(manual_pkgs or []) + + pkg_units: Dict[str, Set[str]] = {} + pkg_roles_map: Dict[str, Set[str]] = {} + + for svc in service_snaps: + for p in svc.packages: + pkg_units.setdefault(p, set()).add(svc.unit) + pkg_roles_map.setdefault(p, set()).add(svc.role_name) + + pkg_role_names: Dict[str, List[str]] = {} + for ps in pkg_snaps: + pkg_roles_map.setdefault(ps.package, set()).add(ps.role_name) + pkg_role_names.setdefault(ps.package, []).append(ps.role_name) + + pkg_names: Set[str] = set() + pkg_names |= manual_set + pkg_names |= set(pkg_units.keys()) + pkg_names |= {ps.package for ps in pkg_snaps} + + packages_inventory: Dict[str, Dict[str, object]] = {} + for pkg in sorted(pkg_names): + installs = installed.get(pkg, []) or [] + arches = sorted({i.get("arch") for i in installs if i.get("arch")}) + vers = sorted({i.get("version") for i in installs if i.get("version")}) + version: Optional[str] = vers[0] if len(vers) == 1 else None + + observed: List[Dict[str, str]] = [] + if pkg in manual_set: + observed.append({"kind": "user_installed"}) + for unit in sorted(pkg_units.get(pkg, set())): + observed.append({"kind": "systemd_unit", "ref": unit}) + for rn in sorted(set(pkg_role_names.get(pkg, []))): + observed.append({"kind": "package_role", "ref": rn}) + + roles = sorted(pkg_roles_map.get(pkg, set())) + + packages_inventory[pkg] = { + "version": version, + "arches": arches, + "installations": installs, + "observed_via": observed, + "roles": roles, + } + state = { "enroll": { "version": get_enroll_version(), + "harvest_time": time.time_ns(), }, "host": { "hostname": os.uname().nodename, @@ -1491,16 +1543,19 @@ def harvest( "pkg_backend": backend.name, "os_release": platform.os_release, }, - "users": asdict(users_snapshot), - "services": [asdict(s) for s in service_snaps], - "manual_packages": manual_pkgs, - "manual_packages_skipped": manual_pkgs_skipped, - "package_roles": [asdict(p) for p in pkg_snaps], - "apt_config": asdict(apt_config_snapshot), - "dnf_config": asdict(dnf_config_snapshot), - "etc_custom": asdict(etc_custom_snapshot), - "usr_local_custom": asdict(usr_local_custom_snapshot), - "extra_paths": asdict(extra_paths_snapshot), + "inventory": { + "packages": packages_inventory, + }, + "roles": { + "users": asdict(users_snapshot), + "services": [asdict(s) for s in service_snaps], + "packages": [asdict(p) for p in pkg_snaps], + "apt_config": asdict(apt_config_snapshot), + "dnf_config": asdict(dnf_config_snapshot), + "etc_custom": asdict(etc_custom_snapshot), + "usr_local_custom": asdict(usr_local_custom_snapshot), + "extra_paths": asdict(extra_paths_snapshot), + }, } state_path = os.path.join(bundle_dir, "state.json") diff --git a/enroll/manifest.py b/enroll/manifest.py index 923040f..8b4008b 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -271,9 +271,7 @@ def _write_hostvars(site_root: str, fqdn: str, role: str, data: Dict[str, Any]) merged = _merge_mappings_overwrite(existing_map, data) - out = "# Generated by enroll (host-specific vars)\n---\n" + _yaml_dump_mapping( - merged, sort_keys=True - ) + out = "---\n" + _yaml_dump_mapping(merged, sort_keys=True) with open(path, "w", encoding="utf-8") as f: f.write(out) @@ -392,7 +390,7 @@ def _render_generic_files_tasks( # Using first_found makes roles work in both modes: # - site-mode: inventory/host_vars///.files/... # - non-site: roles//files/... - return f"""# Generated by enroll + return f""" - name: Deploy any systemd unit files (templates) ansible.builtin.template: @@ -477,7 +475,7 @@ def _render_install_packages_tasks(role: str, var_prefix: str) -> str: generic `package` module. This keeps generated roles usable on both Debian-like and RPM-like systems. """ - return f"""# Generated by enroll + return f""" - name: Install packages for {role} (APT) ansible.builtin.apt: @@ -672,14 +670,16 @@ def _manifest_from_bundle_dir( with open(state_path, "r", encoding="utf-8") as f: state = json.load(f) - services: List[Dict[str, Any]] = state.get("services", []) - package_roles: List[Dict[str, Any]] = state.get("package_roles", []) - users_snapshot: Dict[str, Any] = state.get("users", {}) - apt_config_snapshot: Dict[str, Any] = state.get("apt_config", {}) - dnf_config_snapshot: Dict[str, Any] = state.get("dnf_config", {}) - etc_custom_snapshot: Dict[str, Any] = state.get("etc_custom", {}) - usr_local_custom_snapshot: Dict[str, Any] = state.get("usr_local_custom", {}) - extra_paths_snapshot: Dict[str, Any] = state.get("extra_paths", {}) + roles: Dict[str, Any] = state.get("roles") or {} + + services: List[Dict[str, Any]] = roles.get("services", []) + package_roles: List[Dict[str, Any]] = roles.get("packages", []) + users_snapshot: Dict[str, Any] = roles.get("users", {}) + apt_config_snapshot: Dict[str, Any] = roles.get("apt_config", {}) + dnf_config_snapshot: Dict[str, Any] = roles.get("dnf_config", {}) + etc_custom_snapshot: Dict[str, Any] = roles.get("etc_custom", {}) + usr_local_custom_snapshot: Dict[str, Any] = roles.get("usr_local_custom", {}) + extra_paths_snapshot: Dict[str, Any] = roles.get("extra_paths", {}) site_mode = fqdn is not None and fqdn != "" @@ -839,7 +839,6 @@ def _manifest_from_bundle_dir( # tasks (data-driven) users_tasks = """--- -# Generated by enroll - name: Ensure groups exist ansible.builtin.group: diff --git a/enroll/platform.py b/enroll/platform.py index 998b83d..3c1904b 100644 --- a/enroll/platform.py +++ b/enroll/platform.py @@ -81,6 +81,17 @@ class PackageBackend: def list_manual_packages(self) -> List[str]: # pragma: no cover raise NotImplementedError + def installed_packages(self) -> Dict[str, List[Dict[str, str]]]: # pragma: no cover + """Return mapping of package name -> installed instances. + + Each instance is a dict with at least: + - version: package version string + - arch: architecture string + + Backends should be best-effort and return an empty mapping on failure. + """ + raise NotImplementedError + def build_etc_index( self, ) -> Tuple[ @@ -121,6 +132,11 @@ class DpkgBackend(PackageBackend): return list_manual_packages() + def installed_packages(self) -> Dict[str, List[Dict[str, str]]]: + from .debian import list_installed_packages + + return list_installed_packages() + def build_etc_index(self): from .debian import build_dpkg_etc_index @@ -194,6 +210,11 @@ class RpmBackend(PackageBackend): return list_manual_packages() + def installed_packages(self) -> Dict[str, List[Dict[str, str]]]: + from .rpm import list_installed_packages + + return list_installed_packages() + def build_etc_index(self): from .rpm import build_rpm_etc_index diff --git a/enroll/rpm.py b/enroll/rpm.py index 947617c..9e2892f 100644 --- a/enroll/rpm.py +++ b/enroll/rpm.py @@ -142,6 +142,63 @@ def list_manual_packages() -> List[str]: return [] +def list_installed_packages() -> Dict[str, List[Dict[str, str]]]: + """Return mapping of installed package name -> installed instances. + + Uses `rpm -qa` and is expected to work on RHEL/Fedora-like systems. + + Output format: + {"pkg": [{"version": "...", "arch": "..."}, ...], ...} + + The version string is formatted as: + - "-" for typical packages + - ":-" if a non-zero epoch is present + """ + + try: + _, out = _run( + [ + "rpm", + "-qa", + "--qf", + "%{NAME}\t%{EPOCHNUM}\t%{VERSION}\t%{RELEASE}\t%{ARCH}\n", + ], + allow_fail=False, + merge_err=True, + ) + except Exception: + return {} + + pkgs: Dict[str, List[Dict[str, str]]] = {} + for raw in (out or "").splitlines(): + line = raw.strip("\n") + if not line: + continue + parts = line.split("\t") + if len(parts) < 5: + continue + name, epoch, ver, rel, arch = [p.strip() for p in parts[:5]] + if not name or not ver: + continue + + # Normalise epoch. + epoch = epoch.strip() + if epoch.lower() in ("(none)", "none", ""): + epoch = "0" + + v = f"{ver}-{rel}" if rel else ver + if epoch and epoch.isdigit() and epoch != "0": + v = f"{epoch}:{v}" + + pkgs.setdefault(name, []).append({"version": v, "arch": arch}) + + for k in list(pkgs.keys()): + pkgs[k] = sorted( + pkgs[k], key=lambda x: (x.get("arch") or "", x.get("version") or "") + ) + return pkgs + + def _walk_etc_files() -> List[str]: out: List[str] = [] for dirpath, _, filenames in os.walk("/etc"):