Refactor state structure and capture versions of packages

This commit is contained in:
Miguel Jacq 2025-12-29 16:10:27 +11:00
parent 984b0fa81b
commit 043802e800
Signed by: mig5
GPG key ID: 59B3F0C24135C6A9
6 changed files with 294 additions and 42 deletions

View file

@ -63,6 +63,50 @@ def list_manual_packages() -> List[str]:
return sorted(set(pkgs)) return sorted(set(pkgs))
def list_installed_packages() -> Dict[str, List[Dict[str, str]]]:
"""Return mapping of installed package name -> installed instances.
Uses dpkg-query and is expected to work on Debian/Ubuntu-like systems.
Output format:
{"pkg": [{"version": "...", "arch": "..."}, ...], ...}
"""
try:
p = subprocess.run(
[
"dpkg-query",
"-W",
"-f=${Package}\t${Version}\t${Architecture}\n",
],
text=True,
capture_output=True,
check=False,
) # nosec
except Exception:
return {}
out: Dict[str, List[Dict[str, str]]] = {}
for raw in (p.stdout or "").splitlines():
line = raw.strip("\n")
if not line:
continue
parts = line.split("\t")
if len(parts) < 3:
continue
name, ver, arch = parts[0].strip(), parts[1].strip(), parts[2].strip()
if not name:
continue
out.setdefault(name, []).append({"version": ver, "arch": arch})
# Stable ordering for deterministic JSON dumps.
for k in list(out.keys()):
out[k] = sorted(
out[k], key=lambda x: (x.get("arch") or "", x.get("version") or "")
)
return out
def build_dpkg_etc_index( def build_dpkg_etc_index(
info_dir: str = "/var/lib/dpkg/info", info_dir: str = "/var/lib/dpkg/info",
) -> Tuple[Set[str], Dict[str, str], Dict[str, Set[str]], Dict[str, List[str]]]: ) -> Tuple[Set[str], Dict[str, str], Dict[str, Set[str]], Dict[str, List[str]]]:

View file

@ -126,18 +126,62 @@ def _load_state(bundle_dir: Path) -> Dict[str, Any]:
return json.load(f) return json.load(f)
def _packages_inventory(state: Dict[str, Any]) -> Dict[str, Any]:
return (state.get("inventory") or {}).get("packages") or {}
def _all_packages(state: Dict[str, Any]) -> List[str]: def _all_packages(state: Dict[str, Any]) -> List[str]:
pkgs = set(state.get("manual_packages", []) or []) return sorted(_packages_inventory(state).keys())
pkgs |= set(state.get("manual_packages_skipped", []) or [])
for s in state.get("services", []) or []:
for p in s.get("packages", []) or []: def _roles(state: Dict[str, Any]) -> Dict[str, Any]:
pkgs.add(p) return state.get("roles") or {}
return sorted(pkgs)
def _pkg_version_key(entry: Dict[str, Any]) -> Optional[str]:
"""Return a stable string used for version comparison."""
installs = entry.get("installations") or []
if isinstance(installs, list) and installs:
parts: List[str] = []
for inst in installs:
if not isinstance(inst, dict):
continue
arch = str(inst.get("arch") or "")
ver = str(inst.get("version") or "")
if not ver:
continue
parts.append(f"{arch}:{ver}" if arch else ver)
if parts:
return "|".join(sorted(parts))
v = entry.get("version")
if v:
return str(v)
return None
def _pkg_version_display(entry: Dict[str, Any]) -> Optional[str]:
v = entry.get("version")
if v:
return str(v)
installs = entry.get("installations") or []
if isinstance(installs, list) and installs:
parts: List[str] = []
for inst in installs:
if not isinstance(inst, dict):
continue
arch = str(inst.get("arch") or "")
ver = str(inst.get("version") or "")
if not ver:
continue
parts.append(f"{ver} ({arch})" if arch else ver)
if parts:
return ", ".join(sorted(parts))
return None
def _service_units(state: Dict[str, Any]) -> Dict[str, Dict[str, Any]]: def _service_units(state: Dict[str, Any]) -> Dict[str, Dict[str, Any]]:
out: Dict[str, Dict[str, Any]] = {} out: Dict[str, Dict[str, Any]] = {}
for s in state.get("services", []) or []: for s in _roles(state).get("services") or []:
unit = s.get("unit") unit = s.get("unit")
if unit: if unit:
out[str(unit)] = s out[str(unit)] = s
@ -145,7 +189,7 @@ def _service_units(state: Dict[str, Any]) -> Dict[str, Dict[str, Any]]:
def _users_by_name(state: Dict[str, Any]) -> Dict[str, Dict[str, Any]]: def _users_by_name(state: Dict[str, Any]) -> Dict[str, Dict[str, Any]]:
users = (state.get("users") or {}).get("users") or [] users = (_roles(state).get("users") or {}).get("users") or []
out: Dict[str, Dict[str, Any]] = {} out: Dict[str, Dict[str, Any]] = {}
for u in users: for u in users:
name = u.get("name") name = u.get("name")
@ -167,43 +211,43 @@ class FileRec:
def _iter_managed_files(state: Dict[str, Any]) -> Iterable[Tuple[str, Dict[str, Any]]]: def _iter_managed_files(state: Dict[str, Any]) -> Iterable[Tuple[str, Dict[str, Any]]]:
# Services # Services
for s in state.get("services", []) or []: for s in _roles(state).get("services") or []:
role = s.get("role_name") or "unknown" role = s.get("role_name") or "unknown"
for mf in s.get("managed_files", []) or []: for mf in s.get("managed_files", []) or []:
yield str(role), mf yield str(role), mf
# Package roles # Package roles
for p in state.get("package_roles", []) or []: for p in _roles(state).get("packages") or []:
role = p.get("role_name") or "unknown" role = p.get("role_name") or "unknown"
for mf in p.get("managed_files", []) or []: for mf in p.get("managed_files", []) or []:
yield str(role), mf yield str(role), mf
# Users # Users
u = state.get("users") or {} u = _roles(state).get("users") or {}
u_role = u.get("role_name") or "users" u_role = u.get("role_name") or "users"
for mf in u.get("managed_files", []) or []: for mf in u.get("managed_files", []) or []:
yield str(u_role), mf yield str(u_role), mf
# apt_config # apt_config
ac = state.get("apt_config") or {} ac = _roles(state).get("apt_config") or {}
ac_role = ac.get("role_name") or "apt_config" ac_role = ac.get("role_name") or "apt_config"
for mf in ac.get("managed_files", []) or []: for mf in ac.get("managed_files", []) or []:
yield str(ac_role), mf yield str(ac_role), mf
# etc_custom # etc_custom
ec = state.get("etc_custom") or {} ec = _roles(state).get("etc_custom") or {}
ec_role = ec.get("role_name") or "etc_custom" ec_role = ec.get("role_name") or "etc_custom"
for mf in ec.get("managed_files", []) or []: for mf in ec.get("managed_files", []) or []:
yield str(ec_role), mf yield str(ec_role), mf
# usr_local_custom # usr_local_custom
ul = state.get("usr_local_custom") or {} ul = _roles(state).get("usr_local_custom") or {}
ul_role = ul.get("role_name") or "usr_local_custom" ul_role = ul.get("role_name") or "usr_local_custom"
for mf in ul.get("managed_files", []) or []: for mf in ul.get("managed_files", []) or []:
yield str(ul_role), mf yield str(ul_role), mf
# extra_paths # extra_paths
xp = state.get("extra_paths") or {} xp = _roles(state).get("extra_paths") or {}
xp_role = xp.get("role_name") or "extra_paths" xp_role = xp.get("role_name") or "extra_paths"
for mf in xp.get("managed_files", []) or []: for mf in xp.get("managed_files", []) or []:
yield str(xp_role), mf yield str(xp_role), mf
@ -261,12 +305,28 @@ def compare_harvests(
old_state = _load_state(old_b.dir) old_state = _load_state(old_b.dir)
new_state = _load_state(new_b.dir) new_state = _load_state(new_b.dir)
old_pkgs = set(_all_packages(old_state)) old_inv = _packages_inventory(old_state)
new_pkgs = set(_all_packages(new_state)) new_inv = _packages_inventory(new_state)
old_pkgs = set(old_inv.keys())
new_pkgs = set(new_inv.keys())
pkgs_added = sorted(new_pkgs - old_pkgs) pkgs_added = sorted(new_pkgs - old_pkgs)
pkgs_removed = sorted(old_pkgs - new_pkgs) pkgs_removed = sorted(old_pkgs - new_pkgs)
pkgs_version_changed: List[Dict[str, Any]] = []
for pkg in sorted(old_pkgs & new_pkgs):
a = old_inv.get(pkg) or {}
b = new_inv.get(pkg) or {}
if _pkg_version_key(a) != _pkg_version_key(b):
pkgs_version_changed.append(
{
"package": pkg,
"old": _pkg_version_display(a),
"new": _pkg_version_display(b),
}
)
old_units = _service_units(old_state) old_units = _service_units(old_state)
new_units = _service_units(new_state) new_units = _service_units(new_state)
units_added = sorted(set(new_units) - set(old_units)) units_added = sorted(set(new_units) - set(old_units))
@ -380,6 +440,7 @@ def compare_harvests(
[ [
pkgs_added, pkgs_added,
pkgs_removed, pkgs_removed,
pkgs_version_changed,
units_added, units_added,
units_removed, units_removed,
units_changed, units_changed,
@ -413,7 +474,11 @@ def compare_harvests(
"state_mtime": _mtime_iso(new_b.state_path), "state_mtime": _mtime_iso(new_b.state_path),
"host": (new_state.get("host") or {}).get("hostname"), "host": (new_state.get("host") or {}).get("hostname"),
}, },
"packages": {"added": pkgs_added, "removed": pkgs_removed}, "packages": {
"added": pkgs_added,
"removed": pkgs_removed,
"version_changed": pkgs_version_changed,
},
"services": { "services": {
"enabled_added": units_added, "enabled_added": units_added,
"enabled_removed": units_removed, "enabled_removed": units_removed,
@ -471,10 +536,13 @@ def _report_text(report: Dict[str, Any]) -> str:
lines.append("\nPackages") lines.append("\nPackages")
lines.append(f" added: {len(pk.get('added', []) or [])}") lines.append(f" added: {len(pk.get('added', []) or [])}")
lines.append(f" removed: {len(pk.get('removed', []) or [])}") lines.append(f" removed: {len(pk.get('removed', []) or [])}")
lines.append(f" version_changed: {len(pk.get('version_changed', []) or [])}")
for p in pk.get("added", []) or []: for p in pk.get("added", []) or []:
lines.append(f" + {p}") lines.append(f" + {p}")
for p in pk.get("removed", []) or []: for p in pk.get("removed", []) or []:
lines.append(f" - {p}") lines.append(f" - {p}")
for ch in pk.get("version_changed", []) or []:
lines.append(f" ~ {ch.get('package')}: {ch.get('old')} -> {ch.get('new')}")
sv = report.get("services", {}) sv = report.get("services", {})
lines.append("\nServices (enabled systemd units)") lines.append("\nServices (enabled systemd units)")
@ -542,6 +610,7 @@ def _report_text(report: Dict[str, Any]) -> str:
[ [
(pk.get("added") or []), (pk.get("added") or []),
(pk.get("removed") or []), (pk.get("removed") or []),
(pk.get("version_changed") or []),
(sv.get("enabled_added") or []), (sv.get("enabled_added") or []),
(sv.get("enabled_removed") or []), (sv.get("enabled_removed") or []),
(sv.get("changed") or []), (sv.get("changed") or []),
@ -578,6 +647,12 @@ def _report_markdown(report: Dict[str, Any]) -> str:
for p in pk.get("removed", []) or []: for p in pk.get("removed", []) or []:
out.append(f" - `- {p}`\n") out.append(f" - `- {p}`\n")
out.append(f"- Version changed: {len(pk.get('version_changed', []) or [])}\n")
for ch in pk.get("version_changed", []) or []:
out.append(
f" - `~ {ch.get('package')}`: `{ch.get('old')}` → `{ch.get('new')}`\n"
)
sv = report.get("services", {}) sv = report.get("services", {})
out.append("## Services (enabled systemd units)\n") out.append("## Services (enabled systemd units)\n")
if sv.get("enabled_added"): if sv.get("enabled_added"):
@ -672,6 +747,7 @@ def _report_markdown(report: Dict[str, Any]) -> str:
[ [
(pk.get("added") or []), (pk.get("added") or []),
(pk.get("removed") or []), (pk.get("removed") or []),
(pk.get("version_changed") or []),
(sv.get("enabled_added") or []), (sv.get("enabled_added") or []),
(sv.get("enabled_removed") or []), (sv.get("enabled_removed") or []),
(sv.get("changed") or []), (sv.get("changed") or []),

View file

@ -5,6 +5,7 @@ import json
import os import os
import re import re
import shutil import shutil
import time
from dataclasses import dataclass, asdict from dataclasses import dataclass, asdict
from typing import Dict, List, Optional, Set from typing import Dict, List, Optional, Set
@ -1481,9 +1482,60 @@ def harvest(
notes=extra_notes, notes=extra_notes,
) )
# -------------------------
# Inventory: packages (SBOM-ish)
# -------------------------
installed = backend.installed_packages() or {}
manual_set: Set[str] = set(manual_pkgs or [])
pkg_units: Dict[str, Set[str]] = {}
pkg_roles_map: Dict[str, Set[str]] = {}
for svc in service_snaps:
for p in svc.packages:
pkg_units.setdefault(p, set()).add(svc.unit)
pkg_roles_map.setdefault(p, set()).add(svc.role_name)
pkg_role_names: Dict[str, List[str]] = {}
for ps in pkg_snaps:
pkg_roles_map.setdefault(ps.package, set()).add(ps.role_name)
pkg_role_names.setdefault(ps.package, []).append(ps.role_name)
pkg_names: Set[str] = set()
pkg_names |= manual_set
pkg_names |= set(pkg_units.keys())
pkg_names |= {ps.package for ps in pkg_snaps}
packages_inventory: Dict[str, Dict[str, object]] = {}
for pkg in sorted(pkg_names):
installs = installed.get(pkg, []) or []
arches = sorted({i.get("arch") for i in installs if i.get("arch")})
vers = sorted({i.get("version") for i in installs if i.get("version")})
version: Optional[str] = vers[0] if len(vers) == 1 else None
observed: List[Dict[str, str]] = []
if pkg in manual_set:
observed.append({"kind": "user_installed"})
for unit in sorted(pkg_units.get(pkg, set())):
observed.append({"kind": "systemd_unit", "ref": unit})
for rn in sorted(set(pkg_role_names.get(pkg, []))):
observed.append({"kind": "package_role", "ref": rn})
roles = sorted(pkg_roles_map.get(pkg, set()))
packages_inventory[pkg] = {
"version": version,
"arches": arches,
"installations": installs,
"observed_via": observed,
"roles": roles,
}
state = { state = {
"enroll": { "enroll": {
"version": get_enroll_version(), "version": get_enroll_version(),
"harvest_time": time.time_ns(),
}, },
"host": { "host": {
"hostname": os.uname().nodename, "hostname": os.uname().nodename,
@ -1491,16 +1543,19 @@ def harvest(
"pkg_backend": backend.name, "pkg_backend": backend.name,
"os_release": platform.os_release, "os_release": platform.os_release,
}, },
"inventory": {
"packages": packages_inventory,
},
"roles": {
"users": asdict(users_snapshot), "users": asdict(users_snapshot),
"services": [asdict(s) for s in service_snaps], "services": [asdict(s) for s in service_snaps],
"manual_packages": manual_pkgs, "packages": [asdict(p) for p in pkg_snaps],
"manual_packages_skipped": manual_pkgs_skipped,
"package_roles": [asdict(p) for p in pkg_snaps],
"apt_config": asdict(apt_config_snapshot), "apt_config": asdict(apt_config_snapshot),
"dnf_config": asdict(dnf_config_snapshot), "dnf_config": asdict(dnf_config_snapshot),
"etc_custom": asdict(etc_custom_snapshot), "etc_custom": asdict(etc_custom_snapshot),
"usr_local_custom": asdict(usr_local_custom_snapshot), "usr_local_custom": asdict(usr_local_custom_snapshot),
"extra_paths": asdict(extra_paths_snapshot), "extra_paths": asdict(extra_paths_snapshot),
},
} }
state_path = os.path.join(bundle_dir, "state.json") state_path = os.path.join(bundle_dir, "state.json")

View file

@ -271,9 +271,7 @@ def _write_hostvars(site_root: str, fqdn: str, role: str, data: Dict[str, Any])
merged = _merge_mappings_overwrite(existing_map, data) merged = _merge_mappings_overwrite(existing_map, data)
out = "# Generated by enroll (host-specific vars)\n---\n" + _yaml_dump_mapping( out = "---\n" + _yaml_dump_mapping(merged, sort_keys=True)
merged, sort_keys=True
)
with open(path, "w", encoding="utf-8") as f: with open(path, "w", encoding="utf-8") as f:
f.write(out) f.write(out)
@ -392,7 +390,7 @@ def _render_generic_files_tasks(
# Using first_found makes roles work in both modes: # Using first_found makes roles work in both modes:
# - site-mode: inventory/host_vars/<host>/<role>/.files/... # - site-mode: inventory/host_vars/<host>/<role>/.files/...
# - non-site: roles/<role>/files/... # - non-site: roles/<role>/files/...
return f"""# Generated by enroll return f"""
- name: Deploy any systemd unit files (templates) - name: Deploy any systemd unit files (templates)
ansible.builtin.template: ansible.builtin.template:
@ -477,7 +475,7 @@ def _render_install_packages_tasks(role: str, var_prefix: str) -> str:
generic `package` module. This keeps generated roles usable on both generic `package` module. This keeps generated roles usable on both
Debian-like and RPM-like systems. Debian-like and RPM-like systems.
""" """
return f"""# Generated by enroll return f"""
- name: Install packages for {role} (APT) - name: Install packages for {role} (APT)
ansible.builtin.apt: ansible.builtin.apt:
@ -672,14 +670,16 @@ def _manifest_from_bundle_dir(
with open(state_path, "r", encoding="utf-8") as f: with open(state_path, "r", encoding="utf-8") as f:
state = json.load(f) state = json.load(f)
services: List[Dict[str, Any]] = state.get("services", []) roles: Dict[str, Any] = state.get("roles") or {}
package_roles: List[Dict[str, Any]] = state.get("package_roles", [])
users_snapshot: Dict[str, Any] = state.get("users", {}) services: List[Dict[str, Any]] = roles.get("services", [])
apt_config_snapshot: Dict[str, Any] = state.get("apt_config", {}) package_roles: List[Dict[str, Any]] = roles.get("packages", [])
dnf_config_snapshot: Dict[str, Any] = state.get("dnf_config", {}) users_snapshot: Dict[str, Any] = roles.get("users", {})
etc_custom_snapshot: Dict[str, Any] = state.get("etc_custom", {}) apt_config_snapshot: Dict[str, Any] = roles.get("apt_config", {})
usr_local_custom_snapshot: Dict[str, Any] = state.get("usr_local_custom", {}) dnf_config_snapshot: Dict[str, Any] = roles.get("dnf_config", {})
extra_paths_snapshot: Dict[str, Any] = state.get("extra_paths", {}) etc_custom_snapshot: Dict[str, Any] = roles.get("etc_custom", {})
usr_local_custom_snapshot: Dict[str, Any] = roles.get("usr_local_custom", {})
extra_paths_snapshot: Dict[str, Any] = roles.get("extra_paths", {})
site_mode = fqdn is not None and fqdn != "" site_mode = fqdn is not None and fqdn != ""
@ -839,7 +839,6 @@ def _manifest_from_bundle_dir(
# tasks (data-driven) # tasks (data-driven)
users_tasks = """--- users_tasks = """---
# Generated by enroll
- name: Ensure groups exist - name: Ensure groups exist
ansible.builtin.group: ansible.builtin.group:

View file

@ -81,6 +81,17 @@ class PackageBackend:
def list_manual_packages(self) -> List[str]: # pragma: no cover def list_manual_packages(self) -> List[str]: # pragma: no cover
raise NotImplementedError raise NotImplementedError
def installed_packages(self) -> Dict[str, List[Dict[str, str]]]: # pragma: no cover
"""Return mapping of package name -> installed instances.
Each instance is a dict with at least:
- version: package version string
- arch: architecture string
Backends should be best-effort and return an empty mapping on failure.
"""
raise NotImplementedError
def build_etc_index( def build_etc_index(
self, self,
) -> Tuple[ ) -> Tuple[
@ -121,6 +132,11 @@ class DpkgBackend(PackageBackend):
return list_manual_packages() return list_manual_packages()
def installed_packages(self) -> Dict[str, List[Dict[str, str]]]:
from .debian import list_installed_packages
return list_installed_packages()
def build_etc_index(self): def build_etc_index(self):
from .debian import build_dpkg_etc_index from .debian import build_dpkg_etc_index
@ -194,6 +210,11 @@ class RpmBackend(PackageBackend):
return list_manual_packages() return list_manual_packages()
def installed_packages(self) -> Dict[str, List[Dict[str, str]]]:
from .rpm import list_installed_packages
return list_installed_packages()
def build_etc_index(self): def build_etc_index(self):
from .rpm import build_rpm_etc_index from .rpm import build_rpm_etc_index

View file

@ -142,6 +142,63 @@ def list_manual_packages() -> List[str]:
return [] return []
def list_installed_packages() -> Dict[str, List[Dict[str, str]]]:
"""Return mapping of installed package name -> installed instances.
Uses `rpm -qa` and is expected to work on RHEL/Fedora-like systems.
Output format:
{"pkg": [{"version": "...", "arch": "..."}, ...], ...}
The version string is formatted as:
- "<version>-<release>" for typical packages
- "<epoch>:<version>-<release>" if a non-zero epoch is present
"""
try:
_, out = _run(
[
"rpm",
"-qa",
"--qf",
"%{NAME}\t%{EPOCHNUM}\t%{VERSION}\t%{RELEASE}\t%{ARCH}\n",
],
allow_fail=False,
merge_err=True,
)
except Exception:
return {}
pkgs: Dict[str, List[Dict[str, str]]] = {}
for raw in (out or "").splitlines():
line = raw.strip("\n")
if not line:
continue
parts = line.split("\t")
if len(parts) < 5:
continue
name, epoch, ver, rel, arch = [p.strip() for p in parts[:5]]
if not name or not ver:
continue
# Normalise epoch.
epoch = epoch.strip()
if epoch.lower() in ("(none)", "none", ""):
epoch = "0"
v = f"{ver}-{rel}" if rel else ver
if epoch and epoch.isdigit() and epoch != "0":
v = f"{epoch}:{v}"
pkgs.setdefault(name, []).append({"version": v, "arch": arch})
for k in list(pkgs.keys()):
pkgs[k] = sorted(
pkgs[k], key=lambda x: (x.get("arch") or "", x.get("version") or "")
)
return pkgs
def _walk_etc_files() -> List[str]: def _walk_etc_files() -> List[str]:
out: List[str] = [] out: List[str] = []
for dirpath, _, filenames in os.walk("/etc"): for dirpath, _, filenames in os.walk("/etc"):