Huge refactor to support extending a generic Config Manager class for different types (Ansible, Puppet... Salt soon?)
Some checks failed
Lint / test (push) Waiting to run
CI / test (push) Has been cancelled

This commit is contained in:
Miguel Jacq 2026-06-17 09:37:32 +10:00
parent 5e6c8e6455
commit de7531424d
Signed by: mig5
GPG key ID: 03906B4110AAD3B8
24 changed files with 5413 additions and 4535 deletions

View file

@ -6,6 +6,7 @@
* Detect active sysctl parameters and write them to a `/etc/sysctl.d/99-enroll.conf` file * Detect active sysctl parameters and write them to a `/etc/sysctl.d/99-enroll.conf` file
* Use `no_log` on systemd unit interrogations to suppress potential sensitive output when applying Ansible * Use `no_log` on systemd unit interrogations to suppress potential sensitive output when applying Ansible
* Support manifesting Puppet code, as well as Ansible! * Support manifesting Puppet code, as well as Ansible!
* A lot of under-the-bonnet refactoring to make it easier to extend to cover other config managers later e.g Salt
# 0.6.0 # 0.6.0

3131
enroll/ansible.py Normal file

File diff suppressed because it is too large Load diff

View file

@ -312,7 +312,7 @@ def _add_common_manifest_args(p: argparse.ArgumentParser) -> None:
"--target", "--target",
choices=["ansible", "puppet"], choices=["ansible", "puppet"],
default="ansible", default="ansible",
help="Manifest target to generate (default: ansible). Puppet output is an initial conservative target.", help="Manifest target to generate (default: ansible).",
) )
p.add_argument( p.add_argument(
"--fqdn", "--fqdn",

299
enroll/cm.py Normal file
View file

@ -0,0 +1,299 @@
from __future__ import annotations
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, Iterable, Iterator, List, Mapping, Set
from .state import load_state, state_path, write_state
@dataclass
class CMModule:
"""Renderer-neutral configuration-management resource group.
A CMModule is intentionally small: it captures the resources that a target
renderer can turn into Ansible tasks, Puppet resources, Salt states, etc.
The renderer may still decide how to name/include/order the group.
"""
role_name: str
module_name: str
packages: Set[str] = field(default_factory=set)
groups: Set[str] = field(default_factory=set)
users: Dict[str, Dict[str, Any]] = field(default_factory=dict)
dirs: Dict[str, Dict[str, Any]] = field(default_factory=dict)
files: Dict[str, Dict[str, Any]] = field(default_factory=dict)
links: Dict[str, Dict[str, Any]] = field(default_factory=dict)
services: Dict[str, Dict[str, Any]] = field(default_factory=dict)
notes: List[str] = field(default_factory=list)
def has_resources(self) -> bool:
return bool(
self.packages
or self.groups
or self.users
or self.dirs
or self.files
or self.links
or self.services
or self.notes
)
@staticmethod
def state_path(bundle_dir: str | Path) -> Path:
"""Return the canonical state.json path for a harvest bundle."""
return state_path(bundle_dir)
@classmethod
def load_state(cls, bundle_dir: str | Path) -> Dict[str, Any]:
"""Load state.json for a renderer using the shared bundle state loader."""
return load_state(bundle_dir)
@classmethod
def _load_state(cls, bundle_dir: str | Path) -> Dict[str, Any]:
"""Backward-compatible alias for renderer subclasses."""
return cls.load_state(bundle_dir)
@classmethod
def write_state(
cls,
bundle_dir: str | Path,
state: Mapping[str, Any],
*,
indent: int = 2,
sort_keys: bool = True,
) -> Path:
"""Write state.json using the shared bundle state writer."""
return write_state(bundle_dir, state, indent=indent, sort_keys=sort_keys)
@staticmethod
def _snapshot_items(snap: Dict[str, Any], key: str) -> Iterator[Dict[str, Any]]:
values = snap.get(key) or []
if not isinstance(values, list):
return
for item in values:
if isinstance(item, dict):
yield item
@classmethod
def managed_dirs_from_snapshot(
cls, snap: Dict[str, Any]
) -> Iterator[Dict[str, Any]]:
return cls._snapshot_items(snap, "managed_dirs")
@classmethod
def managed_files_from_snapshot(
cls, snap: Dict[str, Any]
) -> Iterator[Dict[str, Any]]:
return cls._snapshot_items(snap, "managed_files")
@classmethod
def managed_links_from_snapshot(
cls, snap: Dict[str, Any]
) -> Iterator[Dict[str, Any]]:
return cls._snapshot_items(snap, "managed_links")
def add_managed_dir(
self,
path: str,
*,
owner: Any = "root",
group: Any = "root",
mode: Any = "0755",
**attrs: Any,
) -> None:
if not path:
return
data: Dict[str, Any] = {
"owner": owner or "root",
"group": group or "root",
"mode": mode or "0755",
}
data.update(attrs)
self.dirs.setdefault(path, data)
def add_managed_file(
self,
path: str,
*,
owner: Any = "root",
group: Any = "root",
mode: Any = "0644",
**attrs: Any,
) -> None:
if not path:
return
data: Dict[str, Any] = {
"owner": owner or "root",
"group": group or "root",
"mode": mode or "0644",
}
data.update(attrs)
self.files.setdefault(path, data)
def add_managed_link(self, path: str, **attrs: Any) -> None:
if path:
self.links.setdefault(path, attrs)
def add_snapshot_notes(self, snap: Dict[str, Any]) -> None:
self.notes.extend(str(n) for n in (snap.get("notes", []) or []))
def remove_directory_resource_conflicts(self) -> None:
for path in set(self.files) | set(self.links):
self.dirs.pop(path, None)
def package_section_label(
package_role: Dict[str, Any], inventory_packages: Dict[str, Any]
) -> str:
"""Return the Debian Section/RPM Group label for a package role."""
pkg = str(package_role.get("package") or "").strip()
inv = inventory_packages.get(pkg) or {}
candidates: List[str] = []
for value in (package_role.get("section"), inv.get("section"), inv.get("group")):
if isinstance(value, str) and value.strip():
candidates.append(value.strip())
for inst in inv.get("installations", []) or []:
if not isinstance(inst, dict):
continue
for key in ("section", "group"):
value = inst.get(key)
if isinstance(value, str) and value.strip():
candidates.append(value.strip())
for value in candidates:
if value.lower() not in {"(none)", "none", "unspecified"}:
return value
return "misc"
def section_label_for_packages(
packages: List[str], inventory_packages: Dict[str, Any]
) -> str:
"""Return a stable section/group label for a set of packages."""
for pkg in packages or []:
label = package_section_label({"package": pkg}, inventory_packages)
if label and label.lower() != "misc":
return label
return "misc"
def role_order_key(role: str) -> tuple[int, str]:
# Keep broadly similar ordering to generated Ansible playbooks: package/config
# scaffolding first, then services/users, then host-specific runtime state.
priority = {
"apt_config": 10,
"dnf_config": 11,
"etc_custom": 80,
"usr_local_custom": 81,
"extra_paths": 82,
"users": 90,
"sysctl": 95,
"firewall_runtime": 99,
}
return (priority.get(role, 50), role)
def _drop_duplicate_set_items(
module: CMModule,
values: Set[str],
seen: Set[str],
resource_type: str,
) -> Set[str]:
kept: Set[str] = set()
for value in sorted(values):
if value in seen:
module.notes.append(
f"Skipped duplicate {resource_type}[{value}] already emitted earlier in this catalog."
)
continue
kept.add(value)
seen.add(value)
return kept
def _drop_duplicate_mapping_items(
module: CMModule,
values: Dict[str, Dict[str, Any]],
seen: Set[str],
resource_type: str,
*,
excluded_titles: Set[str] | None = None,
excluded_reason: str = "conflicts with another resource",
) -> Dict[str, Dict[str, Any]]:
kept: Dict[str, Dict[str, Any]] = {}
excluded_titles = excluded_titles or set()
for title, attrs in values.items():
if title in excluded_titles:
module.notes.append(f"Skipped {resource_type}[{title}]: {excluded_reason}.")
continue
if title in seen:
module.notes.append(
f"Skipped duplicate {resource_type}[{title}] already emitted earlier in this catalog."
)
continue
kept[title] = attrs
seen.add(title)
return kept
def resolve_catalog_conflicts(modules: Iterable[CMModule]) -> None:
"""Resolve global catalog conflicts before renderer output.
Puppet and Salt compile a single resource catalog. Ansible can tolerate the
same package, service, or parent directory appearing in more than one role;
catalog targets cannot. Resolve those conflicts in the shared model rather
than deleting renderer output after the fact.
"""
ordered = list(modules)
concrete_file_paths: Set[str] = set()
for module in ordered:
concrete_file_paths.update(module.files)
concrete_file_paths.update(module.links)
seen_packages: Set[str] = set()
seen_groups: Set[str] = set()
seen_users: Set[str] = set()
seen_dirs: Set[str] = set()
seen_files: Set[str] = set()
seen_links: Set[str] = set()
seen_services: Set[str] = set()
for module in ordered:
module.packages = _drop_duplicate_set_items(
module, module.packages, seen_packages, "Package"
)
module.groups = _drop_duplicate_set_items(
module, module.groups, seen_groups, "Group"
)
module.users = _drop_duplicate_mapping_items(
module, module.users, seen_users, "User"
)
module.dirs = _drop_duplicate_mapping_items(
module,
module.dirs,
seen_dirs,
"File",
excluded_titles=concrete_file_paths,
excluded_reason="a file or link with the same path is emitted in this catalog",
)
module.files = _drop_duplicate_mapping_items(
module, module.files, seen_files | seen_links, "File"
)
seen_files.update(module.files)
module.links = _drop_duplicate_mapping_items(
module, module.links, seen_links | seen_files, "File"
)
seen_links.update(module.links)
module.services = _drop_duplicate_mapping_items(
module, module.services, seen_services, "Service"
)

View file

@ -21,6 +21,12 @@ from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Tuple from typing import Any, Dict, Iterable, List, Optional, Tuple
from .remote import _safe_extract_tar from .remote import _safe_extract_tar
from .state import (
inventory_packages_from_state as _packages_inventory,
load_state as _load_state,
roles_from_state as _roles,
state_path,
)
from .pathfilter import PathFilter from .pathfilter import PathFilter
from .sopsutil import decrypt_file_binary_to, require_sops_cmd from .sopsutil import decrypt_file_binary_to, require_sops_cmd
@ -116,7 +122,7 @@ class BundleRef:
@property @property
def state_path(self) -> Path: def state_path(self) -> Path:
return self.dir / "state.json" return state_path(self.dir)
def _bundle_from_input(path: str, *, sops_mode: bool) -> BundleRef: def _bundle_from_input(path: str, *, sops_mode: bool) -> BundleRef:
@ -189,24 +195,10 @@ def _bundle_from_input(path: str, *, sops_mode: bool) -> BundleRef:
) )
def _load_state(bundle_dir: Path) -> Dict[str, Any]:
sp = bundle_dir / "state.json"
with open(sp, "r", encoding="utf-8") as f:
return json.load(f)
def _packages_inventory(state: Dict[str, Any]) -> Dict[str, Any]:
return (state.get("inventory") or {}).get("packages") or {}
def _all_packages(state: Dict[str, Any]) -> List[str]: def _all_packages(state: Dict[str, Any]) -> List[str]:
return sorted(_packages_inventory(state).keys()) return sorted(_packages_inventory(state).keys())
def _roles(state: Dict[str, Any]) -> Dict[str, Any]:
return state.get("roles") or {}
def _pkg_version_key(entry: Dict[str, Any]) -> Optional[str]: def _pkg_version_key(entry: Dict[str, Any]) -> Optional[str]:
"""Return a stable string used for version comparison.""" """Return a stable string used for version comparison."""
installs = entry.get("installations") or [] installs = entry.get("installations") or []

View file

@ -5,7 +5,8 @@ from collections import Counter, defaultdict
from dataclasses import dataclass from dataclasses import dataclass
from typing import Any, Dict, Iterable, List, Tuple from typing import Any, Dict, Iterable, List, Tuple
from .diff import _bundle_from_input, _load_state # reuse existing bundle handling from .diff import _bundle_from_input # reuse existing bundle handling
from .state import load_state
@dataclass(frozen=True) @dataclass(frozen=True)
@ -289,7 +290,7 @@ def explain_state(
- a SOPS-encrypted bundle (.sops) - a SOPS-encrypted bundle (.sops)
""" """
bundle = _bundle_from_input(harvest, sops_mode=sops_mode) bundle = _bundle_from_input(harvest, sops_mode=sops_mode)
state = _load_state(bundle.dir) state = load_state(bundle.dir)
host = state.get("host") or {} host = state.get("host") or {}
enroll = state.get("enroll") or {} enroll = state.get("enroll") or {}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,27 @@
from .context import HarvestCollector, HarvestContext
from .cron_logrotate import CronLogrotateCollection, CronLogrotateCollector
from .package_manager import (
PackageManagerConfigCollection,
PackageManagerConfigCollector,
)
from .paths import ExtraPathsCollector, UsrLocalCustomCollector
from .runtime import RuntimeStateCollection, RuntimeStateCollector
from .services import ServicePackageCollection, ServicePackageCollector
from .users import UsersCollection, UsersCollector
__all__ = [
"CronLogrotateCollection",
"CronLogrotateCollector",
"ExtraPathsCollector",
"HarvestCollector",
"HarvestContext",
"PackageManagerConfigCollection",
"PackageManagerConfigCollector",
"RuntimeStateCollection",
"RuntimeStateCollector",
"ServicePackageCollection",
"ServicePackageCollector",
"UsersCollection",
"UsersCollector",
"UsrLocalCustomCollector",
]

View file

@ -0,0 +1,32 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import Any, Dict, List, Set
from ..ignore import IgnorePolicy
from ..pathfilter import PathFilter
@dataclass
class HarvestContext:
"""Shared context passed to feature collectors."""
bundle_dir: str
policy: IgnorePolicy
path_filter: PathFilter
platform: Dict[str, Any]
backend: Any
installed_pkgs: Dict[str, Any]
installed_names: Set[str]
owned_etc: Set[str]
etc_owner_map: Dict[str, str]
topdir_to_pkgs: Dict[str, Set[str]]
pkg_to_etc_paths: Dict[str, List[str]]
captured_global: Set[str]
class HarvestCollector:
"""Base class for harvest feature collectors."""
def __init__(self, context: HarvestContext) -> None:
self.context = context

View file

@ -0,0 +1,159 @@
from __future__ import annotations
import os
from dataclasses import dataclass
from typing import List, Optional, Set
from .. import harvest as h
from ..harvest import ExcludedFile, ManagedFile, PackageSnapshot
from .context import HarvestCollector
def _pick_installed(installed_names: Set[str], candidates: List[str]) -> Optional[str]:
for candidate in candidates:
if candidate in installed_names:
return candidate
return None
def _is_cron_path(path: str) -> bool:
return (
path == "/etc/crontab"
or path == "/etc/anacrontab"
or path in ("/etc/cron.allow", "/etc/cron.deny")
or path.startswith("/etc/cron.")
or path.startswith("/etc/cron.d/")
or path.startswith("/etc/anacron/")
or path.startswith("/var/spool/cron/")
or path.startswith("/var/spool/crontabs/")
or path.startswith("/var/spool/anacron/")
)
def _is_logrotate_path(path: str) -> bool:
return path == "/etc/logrotate.conf" or path.startswith("/etc/logrotate.d/")
_CRON_CAPTURE_GLOBS = [
"/etc/crontab",
"/etc/cron.d/*",
"/etc/cron.hourly/*",
"/etc/cron.daily/*",
"/etc/cron.weekly/*",
"/etc/cron.monthly/*",
"/etc/cron.allow",
"/etc/cron.deny",
"/etc/anacrontab",
"/etc/anacron/*",
# user crontabs / spool state
"/var/spool/cron/*",
"/var/spool/cron/crontabs/*",
"/var/spool/crontabs/*",
"/var/spool/anacron/*",
]
_LOGROTATE_CAPTURE_GLOBS = [
"/etc/logrotate.conf",
"/etc/logrotate.d/*",
]
@dataclass
class CronLogrotateCollection:
cron_pkg: Optional[str]
logrotate_pkg: Optional[str]
cron_snapshot: Optional[PackageSnapshot]
logrotate_snapshot: Optional[PackageSnapshot]
class CronLogrotateCollector(HarvestCollector):
"""Collect dedicated cron/logrotate package roles before general packages."""
cron_role_name = "cron"
logrotate_role_name = "logrotate"
def collect(self) -> CronLogrotateCollection:
cron_pkg = _pick_installed(
self.context.installed_names,
["cron", "cronie", "cronie-anacron", "vixie-cron", "fcron"],
)
logrotate_pkg = _pick_installed(self.context.installed_names, ["logrotate"])
cron_snapshot = self._collect_cron_snapshot(cron_pkg) if cron_pkg else None
logrotate_snapshot = (
self._collect_logrotate_snapshot(logrotate_pkg) if logrotate_pkg else None
)
return CronLogrotateCollection(
cron_pkg=cron_pkg,
logrotate_pkg=logrotate_pkg,
cron_snapshot=cron_snapshot,
logrotate_snapshot=logrotate_snapshot,
)
def _collect_cron_snapshot(self, cron_pkg: str) -> PackageSnapshot:
managed: List[ManagedFile] = []
excluded: List[ExcludedFile] = []
notes: List[str] = []
seen: Set[str] = set()
for spec in _CRON_CAPTURE_GLOBS:
for path in h._iter_matching_files(spec):
if not os.path.isfile(path) or os.path.islink(path):
continue
h._capture_file(
bundle_dir=self.context.bundle_dir,
role_name=self.cron_role_name,
abs_path=path,
reason="system_cron",
policy=self.context.policy,
path_filter=self.context.path_filter,
managed_out=managed,
excluded_out=excluded,
seen_role=seen,
seen_global=self.context.captured_global,
)
return PackageSnapshot(
package=cron_pkg,
role_name=self.cron_role_name,
section=h._package_section_from_installations(
self.context.installed_pkgs.get(cron_pkg, [])
),
managed_files=managed,
excluded=excluded,
notes=notes,
)
def _collect_logrotate_snapshot(self, logrotate_pkg: str) -> PackageSnapshot:
managed: List[ManagedFile] = []
excluded: List[ExcludedFile] = []
notes: List[str] = []
seen: Set[str] = set()
for spec in _LOGROTATE_CAPTURE_GLOBS:
for path in h._iter_matching_files(spec):
if not os.path.isfile(path) or os.path.islink(path):
continue
h._capture_file(
bundle_dir=self.context.bundle_dir,
role_name=self.logrotate_role_name,
abs_path=path,
reason="system_logrotate",
policy=self.context.policy,
path_filter=self.context.path_filter,
managed_out=managed,
excluded_out=excluded,
seen_role=seen,
seen_global=self.context.captured_global,
)
return PackageSnapshot(
package=logrotate_pkg,
role_name=self.logrotate_role_name,
section=h._package_section_from_installations(
self.context.installed_pkgs.get(logrotate_pkg, [])
),
managed_files=managed,
excluded=excluded,
notes=notes,
)

View file

@ -0,0 +1,81 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import Dict, List, Set
from .. import harvest as h
from ..harvest import AptConfigSnapshot, DnfConfigSnapshot, ExcludedFile, ManagedFile
from .context import HarvestCollector, HarvestContext
@dataclass
class PackageManagerConfigCollection:
apt_config_snapshot: AptConfigSnapshot
dnf_config_snapshot: DnfConfigSnapshot
class PackageManagerConfigCollector(HarvestCollector):
"""Collect package-manager configuration into existing role snapshots."""
def __init__(
self, context: HarvestContext, seen_by_role: Dict[str, Set[str]]
) -> None:
super().__init__(context)
self.seen_by_role = seen_by_role
def collect(self) -> PackageManagerConfigCollection:
apt_notes: List[str] = []
apt_excluded: List[ExcludedFile] = []
apt_managed: List[ManagedFile] = []
dnf_notes: List[str] = []
dnf_excluded: List[ExcludedFile] = []
dnf_managed: List[ManagedFile] = []
apt_role_name = "apt_config"
dnf_role_name = "dnf_config"
if self.context.backend.name == "dpkg":
apt_role_seen = self.seen_by_role.setdefault(apt_role_name, set())
for path, reason in h._iter_apt_capture_paths():
h._capture_file(
bundle_dir=self.context.bundle_dir,
role_name=apt_role_name,
abs_path=path,
reason=reason,
policy=self.context.policy,
path_filter=self.context.path_filter,
managed_out=apt_managed,
excluded_out=apt_excluded,
seen_role=apt_role_seen,
seen_global=self.context.captured_global,
)
elif self.context.backend.name == "rpm":
dnf_role_seen = self.seen_by_role.setdefault(dnf_role_name, set())
for path, reason in h._iter_dnf_capture_paths():
h._capture_file(
bundle_dir=self.context.bundle_dir,
role_name=dnf_role_name,
abs_path=path,
reason=reason,
policy=self.context.policy,
path_filter=self.context.path_filter,
managed_out=dnf_managed,
excluded_out=dnf_excluded,
seen_role=dnf_role_seen,
seen_global=self.context.captured_global,
)
return PackageManagerConfigCollection(
apt_config_snapshot=AptConfigSnapshot(
role_name=apt_role_name,
managed_files=apt_managed,
excluded=apt_excluded,
notes=apt_notes,
),
dnf_config_snapshot=DnfConfigSnapshot(
role_name=dnf_role_name,
managed_files=dnf_managed,
excluded=dnf_excluded,
notes=dnf_notes,
),
)

View file

@ -0,0 +1,247 @@
from __future__ import annotations
import glob
import os
from typing import Dict, List, Optional, Set
from .. import harvest as h
from ..harvest import (
ExcludedFile,
ExtraPathsSnapshot,
ManagedDir,
ManagedFile,
UsrLocalCustomSnapshot,
)
from ..pathfilter import expand_includes
from .context import HarvestCollector, HarvestContext
class UsrLocalCustomCollector(HarvestCollector):
"""Collect selected /usr/local state into the usr_local_custom role."""
role_name = "usr_local_custom"
def __init__(
self,
context: HarvestContext,
seen_by_role: Dict[str, Set[str]],
already_all: Set[str],
) -> None:
super().__init__(context)
self.seen_by_role = seen_by_role
self.already_all = already_all
self.notes: List[str] = []
self.excluded: List[ExcludedFile] = []
self.managed: List[ManagedFile] = []
def collect(self) -> UsrLocalCustomSnapshot:
self._scan_tree(
"/usr/local/etc",
require_executable=False,
cap=h.MAX_FILES_CAP,
reason="usr_local_etc_custom",
)
self._scan_tree(
"/usr/local/bin",
require_executable=True,
cap=h.MAX_FILES_CAP,
reason="usr_local_bin_script",
)
return UsrLocalCustomSnapshot(
role_name=self.role_name,
managed_files=self.managed,
excluded=self.excluded,
notes=self.notes,
)
def _scan_tree(
self,
root: str,
*,
require_executable: bool,
cap: int,
reason: str,
) -> None:
scanned = 0
if not os.path.isdir(root):
return
role_seen = self.seen_by_role.setdefault(self.role_name, set())
for dirpath, _, filenames in os.walk(root):
for filename in filenames:
path = os.path.join(dirpath, filename)
if path in self.already_all:
continue
if not os.path.isfile(path) or os.path.islink(path):
continue
try:
owner, group, mode = h.stat_triplet(path)
except OSError:
self.excluded.append(ExcludedFile(path=path, reason="unreadable"))
continue
if require_executable:
try:
if (int(mode, 8) & 0o111) == 0:
continue
except ValueError:
continue
if h._capture_file(
bundle_dir=self.context.bundle_dir,
role_name=self.role_name,
abs_path=path,
reason=reason,
policy=self.context.policy,
path_filter=self.context.path_filter,
managed_out=self.managed,
excluded_out=self.excluded,
seen_role=role_seen,
seen_global=self.context.captured_global,
metadata=(owner, group, mode),
):
self.already_all.add(path)
scanned += 1
if scanned >= cap:
self.notes.append(
f"Reached file cap ({cap}) while scanning {root}."
)
return
class ExtraPathsCollector(HarvestCollector):
"""Collect user-requested include/exclude paths into extra_paths."""
role_name = "extra_paths"
def __init__(
self,
context: HarvestContext,
seen_by_role: Dict[str, Set[str]],
already_all: Set[str],
*,
include_paths: Optional[List[str]] = None,
exclude_paths: Optional[List[str]] = None,
) -> None:
super().__init__(context)
self.seen_by_role = seen_by_role
self.already_all = already_all
self.include_specs = list(include_paths or [])
self.exclude_specs = list(exclude_paths or [])
self.notes: List[str] = []
self.excluded: List[ExcludedFile] = []
self.managed: List[ManagedFile] = []
self.managed_dirs: List[ManagedDir] = []
self.dir_seen: Set[str] = set()
def collect(self) -> ExtraPathsSnapshot:
self._collect_included_dirs()
if self.include_specs:
self.notes.append("User include patterns:")
self.notes.extend([f"- {p}" for p in self.include_specs])
if self.exclude_specs:
self.notes.append("User exclude patterns:")
self.notes.extend([f"- {p}" for p in self.exclude_specs])
included_files: List[str] = []
if self.include_specs:
files, inc_notes = expand_includes(
self.context.path_filter.iter_include_patterns(),
exclude=self.context.path_filter,
max_files=h.MAX_FILES_CAP,
)
included_files = files
self.notes.extend(inc_notes)
role_seen = self.seen_by_role.setdefault(self.role_name, set())
for path in included_files:
if path in self.already_all:
continue
if h._capture_file(
bundle_dir=self.context.bundle_dir,
role_name=self.role_name,
abs_path=path,
reason="user_include",
policy=self.context.policy,
path_filter=self.context.path_filter,
managed_out=self.managed,
excluded_out=self.excluded,
seen_role=role_seen,
seen_global=self.context.captured_global,
):
self.already_all.add(path)
return ExtraPathsSnapshot(
role_name=self.role_name,
include_patterns=self.include_specs,
exclude_patterns=self.exclude_specs,
managed_dirs=self.managed_dirs,
managed_files=self.managed,
excluded=self.excluded,
notes=self.notes,
)
def _collect_included_dirs(self) -> None:
for pat in self.context.path_filter.iter_include_patterns():
if pat.kind == "prefix":
path = pat.value
if os.path.isdir(path) and not os.path.islink(path):
self._walk_and_capture_dirs(path)
elif pat.kind == "glob":
for hit in glob.glob(pat.value, recursive=True):
if os.path.isdir(hit) and not os.path.islink(hit):
self._walk_and_capture_dirs(hit)
def _walk_and_capture_dirs(self, root: str) -> None:
root = os.path.normpath(root)
if not root.startswith("/"):
root = "/" + root
if not os.path.isdir(root) or os.path.islink(root):
return
for dirpath, dirnames, _ in os.walk(root, followlinks=False):
if len(self.managed_dirs) >= h.MAX_FILES_CAP:
self.notes.append(
f"Reached directory cap ({h.MAX_FILES_CAP}) while scanning {root}."
)
return
dirpath = os.path.normpath(dirpath)
if not dirpath.startswith("/"):
dirpath = "/" + dirpath
if self.context.path_filter.is_excluded(dirpath):
dirnames[:] = []
continue
if os.path.islink(dirpath) or not os.path.isdir(dirpath):
dirnames[:] = []
continue
if dirpath not in self.dir_seen:
deny = None
deny_dir = getattr(self.context.policy, "deny_reason_dir", None)
if callable(deny_dir):
deny = deny_dir(dirpath)
else:
deny = self.context.policy.deny_reason(dirpath)
if deny in ("not_regular_file", "not_file", "not_regular"):
deny = None
if not deny:
try:
owner, group, mode = h.stat_triplet(dirpath)
self.managed_dirs.append(
ManagedDir(
path=dirpath,
owner=owner,
group=group,
mode=mode,
reason="user_include_dir",
)
)
except OSError:
pass
self.dir_seen.add(dirpath)
pruned: List[str] = []
for dirname in dirnames:
path = os.path.join(dirpath, dirname)
if os.path.islink(path) or self.context.path_filter.is_excluded(path):
continue
pruned.append(dirname)
dirnames[:] = pruned

View file

@ -0,0 +1,64 @@
from __future__ import annotations
import os
from dataclasses import dataclass
from typing import List, Optional
from .. import harvest as h
from ..harvest import FirewallRuntimeSnapshot, SysctlSnapshot
from .context import HarvestCollector, HarvestContext
@dataclass
class RuntimeStateCollection:
firewall_runtime_snapshot: FirewallRuntimeSnapshot
sysctl_snapshot: SysctlSnapshot
class RuntimeStateCollector(HarvestCollector):
"""Collect root-only live runtime state that has generated roles."""
def __init__(
self,
context: HarvestContext,
*,
persistent_ipset_files: Optional[List[str]] = None,
persistent_iptables_v4_files: Optional[List[str]] = None,
persistent_iptables_v6_files: Optional[List[str]] = None,
) -> None:
super().__init__(context)
self.persistent_ipset_files = persistent_ipset_files or []
self.persistent_iptables_v4_files = persistent_iptables_v4_files or []
self.persistent_iptables_v6_files = persistent_iptables_v6_files or []
def collect(self) -> RuntimeStateCollection:
running_as_root = not hasattr(os, "geteuid") or os.geteuid() == 0
if not running_as_root:
return RuntimeStateCollection(
firewall_runtime_snapshot=FirewallRuntimeSnapshot(
role_name="firewall_runtime",
notes=[
"Live ipset/iptables runtime capture skipped because harvest "
"is not running as root."
],
),
sysctl_snapshot=SysctlSnapshot(
role_name="sysctl",
notes=[
"Live sysctl runtime capture skipped because harvest is not "
"running as root."
],
),
)
firewall_runtime_snapshot = h._collect_firewall_runtime_snapshot(
self.context.bundle_dir,
persistent_ipset_files=self.persistent_ipset_files,
persistent_iptables_v4_files=self.persistent_iptables_v4_files,
persistent_iptables_v6_files=self.persistent_iptables_v6_files,
)
sysctl_snapshot = h._collect_sysctl_snapshot(self.context.bundle_dir)
return RuntimeStateCollection(
firewall_runtime_snapshot=firewall_runtime_snapshot,
sysctl_snapshot=sysctl_snapshot,
)

View file

@ -0,0 +1,525 @@
from __future__ import annotations
import glob
import os
from dataclasses import dataclass
from typing import Dict, List, Optional, Set
from .. import harvest as h
from ..harvest import ExcludedFile, ManagedFile, PackageSnapshot, ServiceSnapshot
from ..systemd import UnitQueryError
from .context import HarvestCollector, HarvestContext
from .cron_logrotate import CronLogrotateCollector, _is_cron_path, _is_logrotate_path
@dataclass
class ServicePackageCollection:
service_snaps: List[ServiceSnapshot]
pkg_snaps: List[PackageSnapshot]
manual_pkgs: List[str]
simple_packages: List[str]
manual_pkgs_skipped: List[str]
service_role_aliases: Dict[str, Set[str]]
seen_by_role: Dict[str, Set[str]]
class ServicePackageCollector(HarvestCollector):
"""Collect service-attributed and manually-installed package snapshots."""
def __init__(
self,
context: HarvestContext,
*,
cron_snapshot: Optional[PackageSnapshot] = None,
logrotate_snapshot: Optional[PackageSnapshot] = None,
cron_pkg: Optional[str] = None,
logrotate_pkg: Optional[str] = None,
) -> None:
super().__init__(context)
self.cron_snapshot = cron_snapshot
self.logrotate_snapshot = logrotate_snapshot
self.cron_pkg = cron_pkg
self.logrotate_pkg = logrotate_pkg
self.service_role_aliases: Dict[str, Set[str]] = {}
self.seen_by_role: Dict[str, Set[str]] = {}
self.managed_by_role: Dict[str, List[ManagedFile]] = {}
self.excluded_by_role: Dict[str, List[ExcludedFile]] = {}
def collect(self) -> ServicePackageCollection:
service_snaps, timer_extra_by_pkg = self._collect_service_snapshots()
pkg_snaps, manual_pkgs, simple_packages, manual_pkgs_skipped = (
self._collect_package_snapshots(
service_snaps,
timer_extra_by_pkg,
)
)
self._capture_common_enabled_symlinks(service_snaps, pkg_snaps)
return ServicePackageCollection(
service_snaps=service_snaps,
pkg_snaps=pkg_snaps,
manual_pkgs=manual_pkgs,
simple_packages=simple_packages,
manual_pkgs_skipped=manual_pkgs_skipped,
service_role_aliases=self.service_role_aliases,
seen_by_role=self.seen_by_role,
)
def _collect_service_snapshots(
self,
) -> tuple[List[ServiceSnapshot], Dict[str, List[str]]]:
backend = self.context.backend
service_snaps: List[ServiceSnapshot] = []
enabled_services = h.list_enabled_services()
if self.cron_snapshot is not None or self.logrotate_snapshot is not None:
blocked_roles = set()
if self.cron_snapshot is not None:
blocked_roles.add(CronLogrotateCollector.cron_role_name)
if self.logrotate_snapshot is not None:
blocked_roles.add(CronLogrotateCollector.logrotate_role_name)
enabled_services = [
u
for u in enabled_services
if h._role_name_from_unit(u) not in blocked_roles
]
enabled_set = set(enabled_services)
def service_sort_key(unit: str) -> tuple[int, str, str]:
base = unit.removesuffix(".service")
base = base.split("@", 1)[0]
return (base.count("-"), base.lower(), unit.lower())
def parent_service_unit(unit: str) -> Optional[str]:
if not unit.endswith(".service"):
return None
base = unit.removesuffix(".service")
base = base.split("@", 1)[0]
parts = base.split("-")
for i in range(len(parts) - 1, 0, -1):
cand = "-".join(parts[:i]) + ".service"
if cand in enabled_set:
return cand
return None
parent_unit_for = {
u: pu for u in enabled_services if (pu := parent_service_unit(u))
}
for unit in sorted(enabled_services, key=service_sort_key):
role = h._role_name_from_unit(unit)
parent_unit = parent_unit_for.get(unit)
parent_role = h._role_name_from_unit(parent_unit) if parent_unit else None
try:
ui = h.get_unit_info(unit)
except UnitQueryError as e:
self.service_role_aliases.setdefault(
role, h._hint_names(unit, set()) | {role}
)
self.seen_by_role.setdefault(role, set())
managed = self.managed_by_role.setdefault(role, [])
excluded = self.excluded_by_role.setdefault(role, [])
service_snaps.append(
ServiceSnapshot(
unit=unit,
role_name=role,
packages=[],
active_state=None,
sub_state=None,
unit_file_state=None,
condition_result=None,
managed_files=managed,
excluded=excluded,
notes=[str(e)],
)
)
continue
pkgs: Set[str] = set()
notes: List[str] = []
excluded = self.excluded_by_role.setdefault(role, [])
managed = self.managed_by_role.setdefault(role, [])
candidates: Dict[str, str] = {}
if ui.fragment_path:
p = backend.owner_of_path(ui.fragment_path)
if p:
pkgs.add(p)
for exe in ui.exec_paths:
p = backend.owner_of_path(exe)
if p:
pkgs.add(p)
for pth in ui.dropin_paths:
if pth.startswith("/etc/"):
candidates[pth] = "systemd_dropin"
for env_file in ui.env_files:
env_file = env_file.lstrip("-")
if any(ch in env_file for ch in "*?["):
for g in glob.glob(env_file):
if g.startswith("/etc/") and os.path.isfile(g):
candidates[g] = "systemd_envfile"
elif env_file.startswith("/etc/") and os.path.isfile(env_file):
candidates[env_file] = "systemd_envfile"
hints = h._hint_names(unit, pkgs)
h._add_pkgs_from_etc_topdirs(hints, self.context.topdir_to_pkgs, pkgs)
self.service_role_aliases[role] = set(hints) | set(pkgs) | {role}
for sp in h._maybe_add_specific_paths(hints, backend):
if not os.path.exists(sp):
continue
if sp in self.context.etc_owner_map:
pkgs.add(self.context.etc_owner_map[sp])
else:
candidates.setdefault(sp, "custom_specific_path")
for pkg in sorted(pkgs):
etc_paths = self.context.pkg_to_etc_paths.get(pkg, [])
for path, reason in backend.modified_paths(pkg, etc_paths).items():
if not os.path.isfile(path) or os.path.islink(path):
continue
if self.cron_snapshot is not None and _is_cron_path(path):
continue
if self.logrotate_snapshot is not None and _is_logrotate_path(path):
continue
if backend.is_pkg_config_path(path):
continue
candidates.setdefault(path, reason)
any_roots: List[str] = []
confish_roots: List[str] = []
for hint in hints:
roots_for_hint = [f"/etc/{hint}", f"/etc/{hint}.d"]
if hint in h.SHARED_ETC_TOPDIRS:
confish_roots.extend(roots_for_hint)
else:
any_roots.extend(roots_for_hint)
found: List[str] = []
found.extend(
h._scan_unowned_under_roots(
any_roots,
self.context.owned_etc,
limit=h.MAX_UNOWNED_FILES_PER_ROLE,
confish_only=False,
)
)
if len(found) < h.MAX_UNOWNED_FILES_PER_ROLE:
found.extend(
h._scan_unowned_under_roots(
confish_roots,
self.context.owned_etc,
limit=h.MAX_UNOWNED_FILES_PER_ROLE - len(found),
confish_only=True,
)
)
for pth in found:
candidates.setdefault(pth, "custom_unowned")
if not pkgs and not candidates:
notes.append(
"No packages or /etc candidates detected (unexpected for enabled service)."
)
for path, reason in sorted(candidates.items()):
dest_role = role
if (
parent_role
and path.startswith("/etc/")
and reason not in ("systemd_dropin", "systemd_envfile")
):
dest_role = parent_role
dest_managed = self.managed_by_role.setdefault(dest_role, [])
dest_excluded = self.excluded_by_role.setdefault(dest_role, [])
dest_seen = self.seen_by_role.setdefault(dest_role, set())
h._capture_file(
bundle_dir=self.context.bundle_dir,
role_name=dest_role,
abs_path=path,
reason=reason,
policy=self.context.policy,
path_filter=self.context.path_filter,
managed_out=dest_managed,
excluded_out=dest_excluded,
seen_role=dest_seen,
seen_global=self.context.captured_global,
)
service_snaps.append(
ServiceSnapshot(
unit=unit,
role_name=role,
packages=sorted(pkgs),
active_state=ui.active_state,
sub_state=ui.sub_state,
unit_file_state=ui.unit_file_state,
condition_result=ui.condition_result,
managed_files=managed,
excluded=excluded,
notes=notes,
)
)
timer_extra_by_pkg = self._collect_timer_overrides(service_snaps)
return service_snaps, timer_extra_by_pkg
def _collect_timer_overrides(
self,
service_snaps: List[ServiceSnapshot],
) -> Dict[str, List[str]]:
backend = self.context.backend
timer_extra_by_pkg: Dict[str, List[str]] = {}
try:
enabled_timers = h.list_enabled_timers()
except Exception:
enabled_timers = []
service_snap_by_unit = {s.unit: s for s in service_snaps}
for timer in sorted(enabled_timers):
try:
ti = h.get_timer_info(timer)
except Exception: # nosec
continue
timer_paths: List[str] = []
for pth in [ti.fragment_path, *ti.dropin_paths, *ti.env_files]:
if not pth:
continue
if not pth.startswith("/etc/"):
continue
if os.path.islink(pth) or not os.path.isfile(pth):
continue
timer_paths.append(pth)
if not timer_paths:
continue
snap = (
service_snap_by_unit.get(ti.trigger_unit) if ti.trigger_unit else None
)
if snap is not None:
role_seen = self.seen_by_role.setdefault(snap.role_name, set())
for path in timer_paths:
h._capture_file(
bundle_dir=self.context.bundle_dir,
role_name=snap.role_name,
abs_path=path,
reason="related_timer",
policy=self.context.policy,
path_filter=self.context.path_filter,
managed_out=snap.managed_files,
excluded_out=snap.excluded,
seen_role=role_seen,
seen_global=self.context.captured_global,
)
continue
pkgs: Set[str] = set()
if ti.fragment_path:
p = backend.owner_of_path(ti.fragment_path)
if p:
pkgs.add(p)
if ti.trigger_unit and ti.trigger_unit.endswith(".service"):
try:
ui = h.get_unit_info(ti.trigger_unit)
if ui.fragment_path:
p = backend.owner_of_path(ui.fragment_path)
if p:
pkgs.add(p)
for exe in ui.exec_paths:
p = backend.owner_of_path(exe)
if p:
pkgs.add(p)
except Exception: # nosec
pass
for pkg in pkgs:
timer_extra_by_pkg.setdefault(pkg, []).extend(timer_paths)
return timer_extra_by_pkg
def _collect_package_snapshots(
self,
service_snaps: List[ServiceSnapshot],
timer_extra_by_pkg: Dict[str, List[str]],
) -> tuple[List[PackageSnapshot], List[str], List[str], List[str]]:
backend = self.context.backend
manual_pkgs = backend.list_manual_packages()
covered_by_services: Set[str] = set()
for snap in service_snaps:
covered_by_services.update(snap.packages)
manual_pkgs_skipped: List[str] = []
pkg_snaps: List[PackageSnapshot] = []
simple_packages: List[str] = []
if self.cron_snapshot is not None:
pkg_snaps.append(self.cron_snapshot)
if self.logrotate_snapshot is not None:
pkg_snaps.append(self.logrotate_snapshot)
for pkg in sorted(manual_pkgs):
if pkg in covered_by_services:
manual_pkgs_skipped.append(pkg)
continue
if self.cron_snapshot is not None and pkg == self.cron_pkg:
manual_pkgs_skipped.append(pkg)
continue
if self.logrotate_snapshot is not None and pkg == self.logrotate_pkg:
manual_pkgs_skipped.append(pkg)
continue
role = h._role_name_from_pkg(pkg)
notes: List[str] = []
excluded: List[ExcludedFile] = []
managed: List[ManagedFile] = []
candidates: Dict[str, str] = {}
for tpath in timer_extra_by_pkg.get(pkg, []):
candidates.setdefault(tpath, "related_timer")
etc_paths = self.context.pkg_to_etc_paths.get(pkg, [])
for path, reason in backend.modified_paths(pkg, etc_paths).items():
if not os.path.isfile(path) or os.path.islink(path):
continue
if self.cron_snapshot is not None and _is_cron_path(path):
continue
if self.logrotate_snapshot is not None and _is_logrotate_path(path):
continue
if backend.is_pkg_config_path(path):
continue
candidates.setdefault(path, reason)
topdirs = h._topdirs_for_package(pkg, self.context.pkg_to_etc_paths)
roots: List[str] = []
for topdir in sorted(topdirs):
if topdir in h.SHARED_ETC_TOPDIRS:
continue
if backend.is_pkg_config_path(
f"/etc/{topdir}/"
) or backend.is_pkg_config_path(f"/etc/{topdir}"):
continue
roots.extend([f"/etc/{topdir}", f"/etc/{topdir}.d"])
roots.extend(h._maybe_add_specific_paths(set(topdirs), backend))
for pth in h._scan_unowned_under_roots(
[r for r in roots if os.path.isdir(r)],
self.context.owned_etc,
confish_only=False,
):
candidates.setdefault(pth, "custom_unowned")
for root in roots:
if os.path.isfile(root) and not os.path.islink(root):
if root not in self.context.owned_etc and h._is_confish(root):
candidates.setdefault(root, "custom_specific_path")
role_seen = self.seen_by_role.setdefault(role, set())
for path, reason in sorted(candidates.items()):
h._capture_file(
bundle_dir=self.context.bundle_dir,
role_name=role,
abs_path=path,
reason=reason,
policy=self.context.policy,
path_filter=self.context.path_filter,
managed_out=managed,
excluded_out=excluded,
seen_role=role_seen,
seen_global=self.context.captured_global,
)
has_config = bool(managed or excluded)
if not has_config:
notes.append(
"No changed or custom configuration detected for this package."
)
simple_packages.append(pkg)
pkg_snaps.append(
PackageSnapshot(
package=pkg,
role_name=role,
section=h._package_section_from_installations(
self.context.installed_pkgs.get(pkg, [])
),
managed_files=managed,
managed_links=[],
excluded=excluded,
notes=notes,
has_config=has_config,
)
)
return pkg_snaps, manual_pkgs, simple_packages, manual_pkgs_skipped
def _find_role_snapshot(
self,
role_name: str,
service_snaps: List[ServiceSnapshot],
pkg_snaps: List[PackageSnapshot],
):
for snap in service_snaps:
if snap.role_name == role_name:
return snap
for snap in pkg_snaps:
if snap.role_name == role_name:
return snap
return None
def _capture_enabled_symlinks_for_role(
self,
role_name: str,
dirs: List[str],
service_snaps: List[ServiceSnapshot],
pkg_snaps: List[PackageSnapshot],
) -> None:
snap = self._find_role_snapshot(role_name, service_snaps, pkg_snaps)
if snap is None:
return
role_seen = self.seen_by_role.setdefault(role_name, set())
for directory in dirs:
if not os.path.isdir(directory):
continue
for pth in sorted(glob.glob(os.path.join(directory, "*"))):
if not os.path.islink(pth):
continue
h._capture_link(
role_name=role_name,
abs_path=pth,
reason="enabled_symlink",
policy=self.context.policy,
path_filter=self.context.path_filter,
managed_out=snap.managed_links,
excluded_out=snap.excluded,
seen_role=role_seen,
seen_global=self.context.captured_global,
)
def _capture_common_enabled_symlinks(
self,
service_snaps: List[ServiceSnapshot],
pkg_snaps: List[PackageSnapshot],
) -> None:
self._capture_enabled_symlinks_for_role(
"nginx",
["/etc/nginx/modules-enabled", "/etc/nginx/sites-enabled"],
service_snaps,
pkg_snaps,
)
self._capture_enabled_symlinks_for_role(
"apache2",
[
"/etc/apache2/conf-enabled",
"/etc/apache2/mods-enabled",
"/etc/apache2/sites-enabled",
],
service_snaps,
pkg_snaps,
)

View file

@ -0,0 +1,167 @@
from __future__ import annotations
from dataclasses import asdict, dataclass
from typing import Any, Dict, List, Set
from .. import harvest as h
from ..harvest import (
ExcludedFile,
FlatpakSnapshot,
ManagedFile,
SnapSnapshot,
UsersSnapshot,
)
from .context import HarvestCollector, HarvestContext
@dataclass
class UsersCollection:
users_snapshot: UsersSnapshot
flatpak_snapshot: FlatpakSnapshot
snap_snapshot: SnapSnapshot
class UsersCollector(HarvestCollector):
"""Collect non-system users plus system/user Flatpak and Snap facts."""
def __init__(
self, context: HarvestContext, seen_by_role: Dict[str, Set[str]]
) -> None:
super().__init__(context)
self.seen_by_role = seen_by_role
def collect(self) -> UsersCollection:
users_notes: List[str] = []
users_excluded: List[ExcludedFile] = []
users_managed: List[ManagedFile] = []
users_list: List[dict] = []
try:
user_records = h.collect_non_system_users()
except Exception as e:
user_records = []
users_notes.append(f"Failed to enumerate users: {e!r}")
# Detect system-wide Flatpaks/Snaps and configured Flatpak remotes.
from ..accounts import (
find_system_flatpak_remotes,
find_system_flatpaks,
find_system_snaps,
find_user_flatpak_remotes,
)
system_flatpaks = [asdict(f) for f in find_system_flatpaks()]
system_snaps = [asdict(s) for s in find_system_snaps()]
system_flatpak_remotes = [asdict(r) for r in find_system_flatpak_remotes()]
flatpak_notes: List[str] = []
snap_notes: List[str] = []
if system_flatpaks:
flatpak_notes.append(
"System-wide flatpaks detected: "
+ ", ".join(str(f.get("name")) for f in system_flatpaks)
)
if system_snaps:
snap_notes.append(
"System-wide snaps detected: "
+ ", ".join(str(s.get("name")) for s in system_snaps)
)
users_role_name = "users"
users_role_seen = self.seen_by_role.setdefault(users_role_name, set())
skel_dir = "/etc/skel"
auto_capture_user_dotfiles = bool(
getattr(self.context.policy, "dangerous", False)
)
if user_records and not auto_capture_user_dotfiles:
users_notes.append(
"User shell dotfiles were not auto-harvested because --dangerous was not set; "
"use --dangerous for automatic shell-dotfile capture, or targeted "
"--include-path patterns for safe-mode review."
)
user_flatpaks_map: Dict[str, List[Dict[str, Any]]] = {}
user_flatpak_remotes: List[Dict[str, Any]] = []
for user in user_records:
users_list.append(
{
"name": user.name,
"uid": user.uid,
"gid": user.gid,
"gecos": user.gecos,
"home": user.home,
"shell": user.shell,
"primary_group": user.primary_group,
"supplementary_groups": user.supplementary_groups,
}
)
# Copy only safe SSH public material: authorized_keys + *.pub
for ssh_file in user.ssh_files:
reason = (
"authorized_keys"
if ssh_file.endswith("/authorized_keys")
else "ssh_public_key"
)
h._capture_file(
bundle_dir=self.context.bundle_dir,
role_name=users_role_name,
abs_path=ssh_file,
reason=reason,
policy=self.context.policy,
path_filter=self.context.path_filter,
managed_out=users_managed,
excluded_out=users_excluded,
seen_role=users_role_seen,
seen_global=self.context.captured_global,
)
# Capture common per-user shell dotfiles only in dangerous mode. They
# often contain exported tokens or aliases/functions with embedded secrets.
home = (user.home or "").rstrip("/")
if home and home.startswith("/"):
h._capture_user_shell_dotfiles(
bundle_dir=self.context.bundle_dir,
role_name=users_role_name,
home=home,
skel_dir=skel_dir,
enabled=auto_capture_user_dotfiles,
policy=self.context.policy,
path_filter=self.context.path_filter,
managed_out=users_managed,
excluded_out=users_excluded,
seen_role=users_role_seen,
seen_global=self.context.captured_global,
)
# Collect per-user Flatpak applications and remotes. Snap packages are
# system-wide; ~/snap/* is user data, not an install source.
if user.flatpaks:
user_flatpaks_map[user.name] = [asdict(fp) for fp in user.flatpaks]
user_flatpak_remotes.extend(
asdict(r) for r in find_user_flatpak_remotes(home, user=user.name)
)
return UsersCollection(
users_snapshot=UsersSnapshot(
role_name="users",
users=users_list,
managed_files=users_managed,
excluded=users_excluded,
notes=users_notes,
user_flatpaks=user_flatpaks_map,
user_flatpak_remotes=user_flatpak_remotes,
),
flatpak_snapshot=FlatpakSnapshot(
role_name="flatpak",
system_flatpaks=system_flatpaks,
remotes=system_flatpak_remotes,
notes=flatpak_notes,
),
snap_snapshot=SnapSnapshot(
role_name="snap",
system_snaps=system_snaps,
notes=snap_notes,
),
)

File diff suppressed because it is too large Load diff

View file

@ -1,16 +1,137 @@
from __future__ import annotations from __future__ import annotations
import json import json
import os
import re import re
import shutil import shutil
from pathlib import Path from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Set, Tuple from typing import Any, Dict, Iterable, List, Optional, Tuple
from .cm import (
CMModule,
package_section_label,
resolve_catalog_conflicts,
role_order_key,
section_label_for_packages,
)
from .state import inventory_packages_from_state, roles_from_state
def _load_state(bundle_dir: str) -> Dict[str, Any]: class PuppetRole(CMModule):
with open(os.path.join(bundle_dir, "state.json"), "r", encoding="utf-8") as f: """Puppet-specific view of a renderer-neutral CMModule."""
return json.load(f)
def __init__(self, role_name: str) -> None:
super().__init__(
role_name=role_name,
module_name=_puppet_name(role_name, fallback="enroll_role"),
)
def add_package_snapshot(self, snap: Dict[str, Any]) -> None:
pkg = str(snap.get("package") or "").strip()
if pkg:
self.packages.add(pkg)
def add_service_snapshot(self, snap: Dict[str, Any]) -> None:
for pkg in snap.get("packages", []) or []:
pkg_s = str(pkg or "").strip()
if pkg_s:
self.packages.add(pkg_s)
unit = str(snap.get("unit") or "").strip()
if unit:
unit_file_state = str(snap.get("unit_file_state") or "")
self.services[unit] = {
"name": unit,
"ensure": (
"running" if snap.get("active_state") == "active" else "stopped"
),
"enable": unit_file_state in ("enabled", "enabled-runtime"),
}
def add_users_snapshot(self, snap: Dict[str, Any]) -> None:
for u in snap.get("users", []) or []:
if not isinstance(u, dict):
continue
name = str(u.get("name") or "").strip()
if not name:
continue
primary_group = str(u.get("primary_group") or name).strip()
if primary_group:
self.groups.add(primary_group)
supplementary = sorted(
{
str(g).strip()
for g in (u.get("supplementary_groups") or [])
if str(g).strip()
}
)
self.groups.update(supplementary)
self.users[name] = {
"name": name,
"uid": u.get("uid"),
"gid": u.get("gid"),
"primary_group": primary_group or None,
"home": u.get("home") or f"/home/{name}",
"shell": u.get("shell"),
"gecos": u.get("gecos"),
"supplementary_groups": supplementary,
}
if snap.get("user_flatpaks") or snap.get("user_flatpak_remotes"):
self.notes.append(
"Per-user Flatpak resources were detected but are not yet rendered as native Puppet resources."
)
def add_managed_content(
self,
snap: Dict[str, Any],
*,
bundle_dir: str,
artifact_role: str,
module_files_dir: Path,
) -> None:
for d in self.managed_dirs_from_snapshot(snap):
path = str(d.get("path") or "").strip()
self.add_managed_dir(
path,
owner=d.get("owner") or "root",
group=d.get("group") or "root",
mode=d.get("mode") or "0755",
reason=d.get("reason") or "managed_dir",
)
for mf in self.managed_files_from_snapshot(snap):
path = str(mf.get("path") or "").strip()
src_rel = str(mf.get("src_rel") or "").strip()
if not path or not src_rel:
continue
module_rel = _copy_artifact(
bundle_dir, artifact_role, src_rel, module_files_dir
)
if not module_rel:
self.notes.append(
f"Skipped {path}: harvested artifact {artifact_role}/{src_rel} was not present."
)
continue
self.add_managed_file(
path,
owner=mf.get("owner") or "root",
group=mf.get("group") or "root",
mode=mf.get("mode") or "0644",
source=_source_uri(self.module_name, module_rel),
reason=mf.get("reason") or "managed_file",
)
for ml in self.managed_links_from_snapshot(snap):
path = str(ml.get("path") or "").strip()
target = str(ml.get("target") or "").strip()
if not path or not target:
continue
self.add_managed_link(
path,
target=target,
reason=ml.get("reason") or "managed_link",
)
self.remove_directory_resource_conflicts()
# https://help.puppet.com/core/current/Content/PuppetCore/lang_reserved_words.htm # https://help.puppet.com/core/current/Content/PuppetCore/lang_reserved_words.htm
@ -99,230 +220,18 @@ def _source_uri(module_name: str, module_rel: str) -> str:
return f"puppet:///modules/{module_name}/{module_rel}" return f"puppet:///modules/{module_name}/{module_rel}"
def _roles(state: Dict[str, Any]) -> Dict[str, Any]: def _add_flatpak_snap_notes(roles: Dict[str, Any], out: Dict[str, PuppetRole]) -> None:
roles = state.get("roles")
return roles if isinstance(roles, dict) else {}
def _inventory_packages(state: Dict[str, Any]) -> Dict[str, Any]:
inventory = state.get("inventory")
if not isinstance(inventory, dict):
return {}
packages = inventory.get("packages")
return packages if isinstance(packages, dict) else {}
def _package_section_label(
package_role: Dict[str, Any], inventory_packages: Dict[str, Any]
) -> str:
pkg = str(package_role.get("package") or "").strip()
inv = inventory_packages.get(pkg) or {}
candidates: List[str] = []
for value in (package_role.get("section"), inv.get("section"), inv.get("group")):
if isinstance(value, str) and value.strip():
candidates.append(value.strip())
for inst in inv.get("installations", []) or []:
if not isinstance(inst, dict):
continue
for key in ("section", "group"):
value = inst.get(key)
if isinstance(value, str) and value.strip():
candidates.append(value.strip())
for value in candidates:
if value.lower() not in {"(none)", "none", "unspecified"}:
return value
return "misc"
def _section_label_for_packages(
packages: List[str], inventory_packages: Dict[str, Any]
) -> str:
for pkg in packages or []:
label = _package_section_label({"package": pkg}, inventory_packages)
if label and label.lower() != "misc":
return label
return "misc"
class _PuppetRole:
def __init__(self, role_name: str) -> None:
self.role_name = role_name
self.module_name = _puppet_name(role_name, fallback="enroll_role")
self.packages: Set[str] = set()
self.groups: Set[str] = set()
self.users: Dict[str, Dict[str, Any]] = {}
self.dirs: Dict[str, Dict[str, Any]] = {}
self.files: Dict[str, Dict[str, Any]] = {}
self.links: Dict[str, Dict[str, Any]] = {}
self.services: Dict[str, Dict[str, Any]] = {}
self.notes: List[str] = []
def has_resources(self) -> bool:
return bool(
self.packages
or self.groups
or self.users
or self.dirs
or self.files
or self.links
or self.services
or self.notes
)
def _role_order_key(role: str) -> tuple[int, str]:
# Keep broadly similar ordering to generated Ansible playbooks: package/config
# scaffolding first, then services/users, then host-specific runtime state.
priority = {
"apt_config": 10,
"dnf_config": 11,
"etc_custom": 80,
"usr_local_custom": 81,
"extra_paths": 82,
"users": 90,
"sysctl": 95,
"firewall_runtime": 99,
}
return (priority.get(role, 50), role)
def _add_managed_content(
prole: _PuppetRole,
snap: Dict[str, Any],
*,
bundle_dir: str,
artifact_role: str,
module_files_dir: Path,
) -> None:
for d in snap.get("managed_dirs", []) or []:
if not isinstance(d, dict):
continue
path = str(d.get("path") or "").strip()
if not path:
continue
prole.dirs.setdefault(
path,
{
"owner": d.get("owner") or "root",
"group": d.get("group") or "root",
"mode": d.get("mode") or "0755",
"reason": d.get("reason") or "managed_dir",
},
)
for mf in snap.get("managed_files", []) or []:
if not isinstance(mf, dict):
continue
path = str(mf.get("path") or "").strip()
src_rel = str(mf.get("src_rel") or "").strip()
if not path or not src_rel:
continue
module_rel = _copy_artifact(
bundle_dir, artifact_role, src_rel, module_files_dir
)
if not module_rel:
prole.notes.append(
f"Skipped {path}: harvested artifact {artifact_role}/{src_rel} was not present."
)
continue
prole.files.setdefault(
path,
{
"owner": mf.get("owner") or "root",
"group": mf.get("group") or "root",
"mode": mf.get("mode") or "0644",
"source": _source_uri(prole.module_name, module_rel),
"reason": mf.get("reason") or "managed_file",
},
)
for ml in snap.get("managed_links", []) or []:
if not isinstance(ml, dict):
continue
path = str(ml.get("path") or "").strip()
target = str(ml.get("target") or "").strip()
if not path or not target:
continue
prole.links.setdefault(
path,
{
"target": target,
"reason": ml.get("reason") or "managed_link",
},
)
for path in set(prole.files) | set(prole.links):
prole.dirs.pop(path, None)
def _build_users_role(prole: _PuppetRole, snap: Dict[str, Any]) -> None:
for u in snap.get("users", []) or []:
if not isinstance(u, dict):
continue
name = str(u.get("name") or "").strip()
if not name:
continue
primary_group = str(u.get("primary_group") or name).strip()
if primary_group:
prole.groups.add(primary_group)
supplementary = sorted(
{
str(g).strip()
for g in (u.get("supplementary_groups") or [])
if str(g).strip()
}
)
prole.groups.update(supplementary)
prole.users[name] = {
"name": name,
"uid": u.get("uid"),
"gid": u.get("gid"),
"primary_group": primary_group or None,
"home": u.get("home") or f"/home/{name}",
"shell": u.get("shell"),
"gecos": u.get("gecos"),
"supplementary_groups": supplementary,
}
if snap.get("user_flatpaks") or snap.get("user_flatpak_remotes"):
prole.notes.append(
"Per-user Flatpak resources were detected but are not yet rendered as native Puppet resources."
)
def _build_service_role(prole: _PuppetRole, snap: Dict[str, Any]) -> None:
for pkg in snap.get("packages", []) or []:
pkg_s = str(pkg or "").strip()
if pkg_s:
prole.packages.add(pkg_s)
unit = str(snap.get("unit") or "").strip()
if unit:
unit_file_state = str(snap.get("unit_file_state") or "")
prole.services[unit] = {
"name": unit,
"ensure": "running" if snap.get("active_state") == "active" else "stopped",
"enable": unit_file_state in ("enabled", "enabled-runtime"),
}
def _build_package_role(prole: _PuppetRole, snap: Dict[str, Any]) -> None:
pkg = str(snap.get("package") or "").strip()
if pkg:
prole.packages.add(pkg)
def _add_flatpak_snap_notes(roles: Dict[str, Any], out: Dict[str, _PuppetRole]) -> None:
flatpak = roles.get("flatpak") or {} flatpak = roles.get("flatpak") or {}
if isinstance(flatpak, dict) and ( if isinstance(flatpak, dict) and (
flatpak.get("system_flatpaks") or flatpak.get("remotes") flatpak.get("system_flatpaks") or flatpak.get("remotes")
): ):
prole = out.setdefault("flatpak", _PuppetRole("flatpak")) prole = out.setdefault("flatpak", PuppetRole("flatpak"))
prole.notes.append( prole.notes.append(
"Flatpak resources were detected but are not yet rendered as native Puppet resources." "Flatpak resources were detected but are not yet rendered as native Puppet resources."
) )
snap = roles.get("snap") or {} snap = roles.get("snap") or {}
if isinstance(snap, dict) and snap.get("system_snaps"): if isinstance(snap, dict) and snap.get("system_snaps"):
prole = out.setdefault("snap", _PuppetRole("snap")) prole = out.setdefault("snap", PuppetRole("snap"))
prole.notes.append( prole.notes.append(
"Snap resources were detected but are not yet rendered as native Puppet resources." "Snap resources were detected but are not yet rendered as native Puppet resources."
) )
@ -335,15 +244,15 @@ def _collect_puppet_roles(
*, *,
fqdn: Optional[str] = None, fqdn: Optional[str] = None,
no_common_roles: bool = False, no_common_roles: bool = False,
) -> List[_PuppetRole]: ) -> List[PuppetRole]:
roles = _roles(state) roles = roles_from_state(state)
inventory_packages = _inventory_packages(state) inventory_packages = inventory_packages_from_state(state)
use_common_modules = not fqdn and not no_common_roles use_common_modules = not fqdn and not no_common_roles
out: Dict[str, _PuppetRole] = {} out: Dict[str, PuppetRole] = {}
def ensure_role(role_name: str) -> _PuppetRole: def ensure_role(role_name: str) -> PuppetRole:
role_name = _puppet_name(role_name, fallback="enroll_role") role_name = _puppet_name(role_name, fallback="enroll_role")
return out.setdefault(role_name, _PuppetRole(role_name)) return out.setdefault(role_name, PuppetRole(role_name))
for key in ( for key in (
"apt_config", "apt_config",
@ -361,8 +270,7 @@ def _collect_puppet_roles(
) )
prole = ensure_role(role_name) prole = ensure_role(role_name)
module_files_dir = modules_dir / prole.module_name / "files" module_files_dir = modules_dir / prole.module_name / "files"
_add_managed_content( prole.add_managed_content(
prole,
snap, snap,
bundle_dir=bundle_dir, bundle_dir=bundle_dir,
artifact_role=str(snap.get("role_name") or key), artifact_role=str(snap.get("role_name") or key),
@ -375,9 +283,8 @@ def _collect_puppet_roles(
str(users_snap.get("role_name") or "users"), fallback="enroll_role" str(users_snap.get("role_name") or "users"), fallback="enroll_role"
) )
prole = ensure_role(role_name) prole = ensure_role(role_name)
_build_users_role(prole, users_snap) prole.add_users_snapshot(users_snap)
_add_managed_content( prole.add_managed_content(
prole,
users_snap, users_snap,
bundle_dir=bundle_dir, bundle_dir=bundle_dir,
artifact_role=str(users_snap.get("role_name") or "users"), artifact_role=str(users_snap.get("role_name") or "users"),
@ -393,7 +300,7 @@ def _collect_puppet_roles(
) )
if use_common_modules: if use_common_modules:
role_name = _puppet_name( role_name = _puppet_name(
_section_label_for_packages( section_label_for_packages(
[ [
str(p).strip() str(p).strip()
for p in (svc.get("packages") or []) for p in (svc.get("packages") or [])
@ -406,9 +313,8 @@ def _collect_puppet_roles(
else: else:
role_name = original_role_name role_name = original_role_name
prole = ensure_role(role_name) prole = ensure_role(role_name)
_build_service_role(prole, svc) prole.add_service_snapshot(svc)
_add_managed_content( prole.add_managed_content(
prole,
svc, svc,
bundle_dir=bundle_dir, bundle_dir=bundle_dir,
artifact_role=str(svc.get("role_name") or original_role_name), artifact_role=str(svc.get("role_name") or original_role_name),
@ -424,15 +330,14 @@ def _collect_puppet_roles(
) )
if use_common_modules: if use_common_modules:
role_name = _puppet_name( role_name = _puppet_name(
_package_section_label(pkg, inventory_packages), package_section_label(pkg, inventory_packages),
fallback="package_group", fallback="package_group",
) )
else: else:
role_name = original_role_name role_name = original_role_name
prole = ensure_role(role_name) prole = ensure_role(role_name)
_build_package_role(prole, pkg) prole.add_package_snapshot(pkg)
_add_managed_content( prole.add_managed_content(
prole,
pkg, pkg,
bundle_dir=bundle_dir, bundle_dir=bundle_dir,
artifact_role=str(pkg.get("role_name") or original_role_name), artifact_role=str(pkg.get("role_name") or original_role_name),
@ -459,71 +364,12 @@ def _collect_puppet_roles(
_add_flatpak_snap_notes(roles, out) _add_flatpak_snap_notes(roles, out)
puppet_roles = sorted(out.values(), key=lambda r: _role_order_key(r.role_name)) puppet_roles = sorted(out.values(), key=lambda r: role_order_key(r.role_name))
_dedupe_puppet_roles(puppet_roles) resolve_catalog_conflicts(puppet_roles)
return [r for r in puppet_roles if r.has_resources()] return [r for r in puppet_roles if r.has_resources()]
def _dedupe_puppet_roles(puppet_roles: List[_PuppetRole]) -> None: def _render_role_class(prole: PuppetRole) -> str:
"""Remove duplicate catalog resources across generated Puppet classes.
Ansible can repeat the same directory task in multiple roles. Puppet cannot:
a resource title such as File['/etc/default'] may appear only once in the
compiled catalog. Keep the first declaration in manifest order and drop
later duplicates.
"""
concrete_file_paths: Set[str] = set()
for prole in puppet_roles:
concrete_file_paths.update(prole.files)
concrete_file_paths.update(prole.links)
seen_packages: Set[str] = set()
seen_groups: Set[str] = set()
seen_users: Set[str] = set()
seen_dirs: Set[str] = set()
seen_files: Set[str] = set()
seen_links: Set[str] = set()
seen_services: Set[str] = set()
for prole in puppet_roles:
prole.packages = {p for p in prole.packages if p not in seen_packages}
seen_packages.update(prole.packages)
prole.groups = {g for g in prole.groups if g not in seen_groups}
seen_groups.update(prole.groups)
prole.users = {k: v for k, v in prole.users.items() if k not in seen_users}
seen_users.update(prole.users)
prole.dirs = {
k: v
for k, v in prole.dirs.items()
if k not in seen_dirs and k not in concrete_file_paths
}
seen_dirs.update(prole.dirs)
prole.files = {
k: v
for k, v in prole.files.items()
if k not in seen_files and k not in seen_links
}
seen_files.update(prole.files)
prole.links = {
k: v
for k, v in prole.links.items()
if k not in seen_links and k not in seen_files
}
seen_links.update(prole.links)
prole.services = {
k: v for k, v in prole.services.items() if k not in seen_services
}
seen_services.update(prole.services)
def _render_role_class(prole: _PuppetRole) -> str:
has_sysctl_conf = "/etc/sysctl.d/99-enroll.conf" in prole.files has_sysctl_conf = "/etc/sysctl.d/99-enroll.conf" in prole.files
if has_sysctl_conf: if has_sysctl_conf:
lines: List[str] = [ lines: List[str] = [
@ -643,7 +489,7 @@ def _render_role_class(prole: _PuppetRole) -> str:
return "\n".join(lines) return "\n".join(lines)
def _render_site_pp(puppet_roles: List[_PuppetRole], fqdn: Optional[str]) -> str: def _render_site_pp(puppet_roles: List[PuppetRole], fqdn: Optional[str]) -> str:
node_name = _pp_quote(fqdn) if fqdn else "default" node_name = _pp_quote(fqdn) if fqdn else "default"
if not puppet_roles: if not puppet_roles:
return f"node {node_name} {{\n # No Puppet classes were generated from this harvest.\n}}\n" return f"node {node_name} {{\n # No Puppet classes were generated from this harvest.\n}}\n"
@ -671,7 +517,7 @@ def _write_metadata(module_dir: Path, module_name: str) -> None:
) )
def _render_readme(state: Dict[str, Any], puppet_roles: List[_PuppetRole]) -> str: def _render_readme(state: Dict[str, Any], puppet_roles: List[PuppetRole]) -> str:
host = state.get("host", {}) if isinstance(state.get("host"), dict) else {} host = state.get("host", {}) if isinstance(state.get("host"), dict) else {}
hostname = host.get("hostname") or "unknown" hostname = host.get("hostname") or "unknown"
role_lines = ( role_lines = (
@ -726,7 +572,7 @@ sudo puppet apply --modulepath /path/to/generated/modules /path/to/generated/man
## Current limitations ## Current limitations
- Flatpak, Snap, and live firewall runtime snapshots are listed as notes when present rather than rendered as Puppet resources. - Flatpak, Snap, and live firewall runtime snapshots are listed as notes when present rather than rendered as Puppet resources.
- JinjaTurtle templating is Ansible-oriented and is not applied to Puppet output. - JinjaTurtle templating is currently Ansible-oriented and is not applied to Puppet output.
- Review generated resources before applying them broadly across unlike hosts. - Review generated resources before applying them broadly across unlike hosts.
## Notes ## Notes
@ -735,45 +581,75 @@ sudo puppet apply --modulepath /path/to/generated/modules /path/to/generated/man
""" """
def manifest_puppet_from_bundle_dir( class PuppetManifestRenderer:
"""Render Puppet modules and site manifest from a harvest bundle."""
def __init__(
self,
bundle_dir: str,
out_dir: str,
*,
fqdn: Optional[str] = None,
no_common_roles: bool = False,
) -> None:
self.bundle_dir = bundle_dir
self.out_dir = out_dir
self.fqdn = fqdn
self.no_common_roles = no_common_roles
def render(self) -> None:
"""Render Puppet modules/site.pp from a harvest bundle."""
bundle_dir = self.bundle_dir
out_dir = self.out_dir
fqdn = self.fqdn
no_common_roles = self.no_common_roles
state = PuppetRole.load_state(bundle_dir)
out = Path(out_dir)
if out.exists():
shutil.rmtree(out)
manifests_dir = out / "manifests"
modules_dir = out / "modules"
manifests_dir.mkdir(parents=True, exist_ok=True)
modules_dir.mkdir(parents=True, exist_ok=True)
puppet_roles = _collect_puppet_roles(
state,
bundle_dir,
modules_dir,
fqdn=fqdn,
no_common_roles=no_common_roles,
)
for prole in puppet_roles:
module_dir = modules_dir / prole.module_name
module_manifests = module_dir / "manifests"
module_files = module_dir / "files"
module_manifests.mkdir(parents=True, exist_ok=True)
module_files.mkdir(parents=True, exist_ok=True)
(module_manifests / "init.pp").write_text(
_render_role_class(prole), encoding="utf-8"
)
_write_metadata(module_dir, prole.module_name)
(manifests_dir / "site.pp").write_text(
_render_site_pp(puppet_roles, fqdn), encoding="utf-8"
)
(out / "README.md").write_text(
_render_readme(state, puppet_roles), encoding="utf-8"
)
def manifest_from_bundle_dir(
bundle_dir: str, bundle_dir: str,
out_dir: str, out_dir: str,
*, *,
fqdn: Optional[str] = None, fqdn: Optional[str] = None,
no_common_roles: bool = False, no_common_roles: bool = False,
) -> None: ) -> None:
"""Render Puppet modules/site.pp from a harvest bundle.""" PuppetManifestRenderer(
state = _load_state(bundle_dir)
out = Path(out_dir)
if out.exists():
shutil.rmtree(out)
manifests_dir = out / "manifests"
modules_dir = out / "modules"
manifests_dir.mkdir(parents=True, exist_ok=True)
modules_dir.mkdir(parents=True, exist_ok=True)
puppet_roles = _collect_puppet_roles(
state,
bundle_dir, bundle_dir,
modules_dir, out_dir,
fqdn=fqdn, fqdn=fqdn,
no_common_roles=no_common_roles, no_common_roles=no_common_roles,
) ).render()
for prole in puppet_roles:
module_dir = modules_dir / prole.module_name
module_manifests = module_dir / "manifests"
module_files = module_dir / "files"
module_manifests.mkdir(parents=True, exist_ok=True)
module_files.mkdir(parents=True, exist_ok=True)
(module_manifests / "init.pp").write_text(
_render_role_class(prole), encoding="utf-8"
)
_write_metadata(module_dir, prole.module_name)
(manifests_dir / "site.pp").write_text(
_render_site_pp(puppet_roles, fqdn), encoding="utf-8"
)
(out / "README.md").write_text(
_render_readme(state, puppet_roles), encoding="utf-8"
)

53
enroll/state.py Normal file
View file

@ -0,0 +1,53 @@
from __future__ import annotations
import json
from pathlib import Path
from typing import Any, Dict, Mapping, Union
BundlePath = Union[str, Path]
State = Dict[str, Any]
def state_path(bundle_dir: BundlePath) -> Path:
"""Return the canonical state.json path for a harvest bundle."""
return Path(bundle_dir) / "state.json"
def load_state(bundle_dir: BundlePath) -> State:
"""Load state.json from a harvest bundle directory."""
with open(state_path(bundle_dir), "r", encoding="utf-8") as f:
return json.load(f)
def write_state(
bundle_dir: BundlePath,
state: Mapping[str, Any],
*,
indent: int = 2,
sort_keys: bool = True,
) -> Path:
"""Write state.json to a harvest bundle directory and return its path."""
path = state_path(bundle_dir)
with open(path, "w", encoding="utf-8") as f:
json.dump(state, f, indent=indent, sort_keys=sort_keys)
return path
def roles_from_state(state: Mapping[str, Any]) -> Dict[str, Any]:
"""Return the roles mapping from a harvest state, or an empty mapping."""
roles = state.get("roles")
return dict(roles) if isinstance(roles, dict) else {}
def inventory_packages_from_state(state: Mapping[str, Any]) -> Dict[str, Any]:
"""Return inventory.packages from a harvest state, or an empty mapping."""
inventory = state.get("inventory")
if not isinstance(inventory, dict):
return {}
packages = inventory.get("packages")
return dict(packages) if isinstance(packages, dict) else {}

View file

@ -9,6 +9,7 @@ from typing import Any, Dict, List, Optional, Set, Tuple
import jsonschema import jsonschema
from .diff import BundleRef, _bundle_from_input from .diff import BundleRef, _bundle_from_input
from .state import load_state
@dataclass @dataclass
@ -153,7 +154,7 @@ def validate_harvest(
) )
try: try:
state = json.loads(state_path.read_text(encoding="utf-8")) state = load_state(bundle.dir)
except Exception as e: # noqa: BLE001 except Exception as e: # noqa: BLE001
return ValidationResult( return ValidationResult(
errors=[f"failed to parse state.json: {e!r}"], warnings=[] errors=[f"failed to parse state.json: {e!r}"], warnings=[]

40
tests/test_cm.py Normal file
View file

@ -0,0 +1,40 @@
from __future__ import annotations
from enroll.cm import CMModule, resolve_catalog_conflicts
def test_resolve_catalog_conflicts_dedupes_before_rendering():
first = CMModule(role_name="admin", module_name="admin")
first.packages.add("curl")
first.dirs["/etc/default"] = {"owner": "root"}
first.files["/etc/foo.conf"] = {"owner": "root"}
second = CMModule(role_name="misc", module_name="misc")
second.packages.add("curl")
second.dirs["/etc/default"] = {"owner": "root"}
second.dirs["/etc/foo.conf"] = {"owner": "root"}
second.files["/etc/foo.conf"] = {"owner": "root"}
resolve_catalog_conflicts([first, second])
assert first.packages == {"curl"}
assert "/etc/default" in first.dirs
assert "/etc/foo.conf" in first.files
assert second.packages == set()
assert second.dirs == {}
assert second.files == {}
assert any("duplicate Package[curl]" in note for note in second.notes)
assert any("duplicate File[/etc/default]" in note for note in second.notes)
assert any("a file or link with the same path" in note for note in second.notes)
def test_cm_module_uses_shared_state_io(tmp_path):
state = {"roles": {"packages": []}}
written = CMModule.write_state(tmp_path, state)
assert written == tmp_path / "state.json"
assert CMModule.state_path(tmp_path) == written
assert CMModule.load_state(tmp_path) == state
assert CMModule._load_state(tmp_path) == state

View file

@ -0,0 +1,44 @@
from __future__ import annotations
from enroll.harvest import (
FirewallRuntimeSnapshot,
HarvestContext,
IgnorePolicy,
PathFilter,
RuntimeStateCollector,
SysctlSnapshot,
)
class _Backend:
name = "dpkg"
def _context(tmp_path):
return HarvestContext(
bundle_dir=str(tmp_path),
policy=IgnorePolicy(),
path_filter=PathFilter(include=(), exclude=()),
platform={},
backend=_Backend(),
installed_pkgs={},
installed_names=set(),
owned_etc=set(),
etc_owner_map={},
topdir_to_pkgs={},
pkg_to_etc_paths={},
captured_global=set(),
)
def test_runtime_state_collector_preserves_non_root_skip_schema(monkeypatch, tmp_path):
monkeypatch.setattr("enroll.harvest.os.geteuid", lambda: 1000)
result = RuntimeStateCollector(_context(tmp_path)).collect()
assert isinstance(result.firewall_runtime_snapshot, FirewallRuntimeSnapshot)
assert isinstance(result.sysctl_snapshot, SysctlSnapshot)
assert result.firewall_runtime_snapshot.role_name == "firewall_runtime"
assert result.sysctl_snapshot.role_name == "sysctl"
assert "not running as root" in result.firewall_runtime_snapshot.notes[0]
assert "not running as root" in result.sysctl_snapshot.notes[0]

View file

@ -2,6 +2,7 @@ import json
from pathlib import Path from pathlib import Path
import enroll.manifest as manifest_mod import enroll.manifest as manifest_mod
from enroll import ansible as ansible_mod
from enroll.jinjaturtle import JinjifyResult from enroll.jinjaturtle import JinjifyResult
@ -106,7 +107,7 @@ def test_manifest_uses_jinjaturtle_templates_and_does_not_copy_raw(
# Pretend jinjaturtle exists. # Pretend jinjaturtle exists.
monkeypatch.setattr( monkeypatch.setattr(
manifest_mod, "find_jinjaturtle_cmd", lambda: "/usr/bin/jinjaturtle" ansible_mod, "find_jinjaturtle_cmd", lambda: "/usr/bin/jinjaturtle"
) )
# Stub jinjaturtle output. # Stub jinjaturtle output.
@ -119,7 +120,7 @@ def test_manifest_uses_jinjaturtle_templates_and_does_not_copy_raw(
vars_text="foo_key: 1\n", vars_text="foo_key: 1\n",
) )
monkeypatch.setattr(manifest_mod, "run_jinjaturtle", fake_run_jinjaturtle) monkeypatch.setattr(ansible_mod, "run_jinjaturtle", fake_run_jinjaturtle)
manifest_mod.manifest(str(bundle), str(out), jinjaturtle="on") manifest_mod.manifest(str(bundle), str(out), jinjaturtle="on")

View file

@ -7,6 +7,7 @@ import tarfile
import pytest import pytest
import enroll.manifest as manifest import enroll.manifest as manifest
from enroll import ansible as ansible_mod
def _minimal_package_state(packages): def _minimal_package_state(packages):
@ -824,7 +825,7 @@ def test_copy2_replace_overwrites_readonly_destination(tmp_path: Path):
import os import os
import stat import stat
from enroll.manifest import _copy2_replace from enroll.ansible import _copy2_replace
src = tmp_path / "src" src = tmp_path / "src"
dst = tmp_path / "dst" dst = tmp_path / "dst"
@ -935,7 +936,7 @@ def test_manifest_includes_dnf_config_role_when_present(tmp_path: Path):
def test_render_install_packages_tasks_contains_dnf_branch(): def test_render_install_packages_tasks_contains_dnf_branch():
from enroll.manifest import _render_install_packages_tasks from enroll.ansible import _render_install_packages_tasks
txt = _render_install_packages_tasks("role", "role") txt = _render_install_packages_tasks("role", "role")
assert "ansible.builtin.apt" in txt assert "ansible.builtin.apt" in txt
@ -1073,9 +1074,9 @@ def test_manifest_orders_cron_and_logrotate_at_playbook_tail(tmp_path: Path):
def test_yaml_helpers_fallback_when_yaml_unavailable(monkeypatch): def test_yaml_helpers_fallback_when_yaml_unavailable(monkeypatch):
monkeypatch.setattr(manifest, "_try_yaml", lambda: None) monkeypatch.setattr(ansible_mod, "_try_yaml", lambda: None)
assert manifest._yaml_load_mapping("foo: 1\n") == {} assert ansible_mod._yaml_load_mapping("foo: 1\n") == {}
out = manifest._yaml_dump_mapping({"b": 2, "a": 1}) out = ansible_mod._yaml_dump_mapping({"b": 2, "a": 1})
# Best-effort fallback is key: repr(value) # Best-effort fallback is key: repr(value)
assert out.splitlines()[0].startswith("a: ") assert out.splitlines()[0].startswith("a: ")
assert out.endswith("\n") assert out.endswith("\n")
@ -1090,7 +1091,7 @@ def test_copy2_replace_makes_readonly_sources_user_writable(
# Make source read-only; copy2 preserves mode, so tmp will be read-only too. # Make source read-only; copy2 preserves mode, so tmp will be read-only too.
os.chmod(src, 0o444) os.chmod(src, 0o444)
manifest._copy2_replace(str(src), str(dst)) ansible_mod._copy2_replace(str(src), str(dst))
st = os.stat(dst, follow_symlinks=False) st = os.stat(dst, follow_symlinks=False)
assert stat.S_IMODE(st.st_mode) & stat.S_IWUSR assert stat.S_IMODE(st.st_mode) & stat.S_IWUSR
@ -1208,13 +1209,13 @@ def test_manifest_applies_jinjaturtle_to_jinjifyable_managed_file(
__import__("json").dumps(state), encoding="utf-8" __import__("json").dumps(state), encoding="utf-8"
) )
monkeypatch.setattr(manifest, "find_jinjaturtle_cmd", lambda: "jinjaturtle") monkeypatch.setattr(ansible_mod, "find_jinjaturtle_cmd", lambda: "jinjaturtle")
class _Res: class _Res:
template_text = "key={{ foo }}\n" template_text = "key={{ foo }}\n"
vars_text = "foo: 123\n" vars_text = "foo: 123\n"
monkeypatch.setattr(manifest, "run_jinjaturtle", lambda *a, **k: _Res()) monkeypatch.setattr(ansible_mod, "run_jinjaturtle", lambda *a, **k: _Res())
out_dir = tmp_path / "out" out_dir = tmp_path / "out"
manifest.manifest(str(bundle), str(out_dir), jinjaturtle="on") manifest.manifest(str(bundle), str(out_dir), jinjaturtle="on")
@ -1330,7 +1331,7 @@ def test_manifest_writes_firewall_runtime_role(tmp_path: Path):
def test_try_yaml_with_yaml_installed(): def test_try_yaml_with_yaml_installed():
result = manifest._try_yaml() result = ansible_mod._try_yaml()
# PyYAML should be installed for tests # PyYAML should be installed for tests
if result is None: if result is None:
pytest.skip("PyYAML not installed") pytest.skip("PyYAML not installed")
@ -1347,55 +1348,55 @@ list:
- item1 - item1
- item2 - item2
""" """
result = manifest._yaml_load_mapping(text) result = ansible_mod._yaml_load_mapping(text)
assert result["key1"] == "value1" assert result["key1"] == "value1"
assert result["key2"]["nested"] == "value" assert result["key2"]["nested"] == "value"
assert result["list"] == ["item1", "item2"] assert result["list"] == ["item1", "item2"]
def test_yaml_load_mapping_empty(): def test_yaml_load_mapping_empty():
result = manifest._yaml_load_mapping("") result = ansible_mod._yaml_load_mapping("")
assert result == {} assert result == {}
def test_yaml_load_mapping_invalid(): def test_yaml_load_mapping_invalid():
result = manifest._yaml_load_mapping("invalid: yaml: :") result = ansible_mod._yaml_load_mapping("invalid: yaml: :")
assert result == {} assert result == {}
def test_yaml_load_mapping_not_dict(): def test_yaml_load_mapping_not_dict():
result = manifest._yaml_load_mapping("- item1\n- item2") result = ansible_mod._yaml_load_mapping("- item1\n- item2")
assert result == {} assert result == {}
def test_yaml_load_mapping_none(): def test_yaml_load_mapping_none():
result = manifest._yaml_load_mapping("~") result = ansible_mod._yaml_load_mapping("~")
assert result == {} assert result == {}
def test_yaml_dump_mapping_with_yaml(tmp_path: Path): def test_yaml_dump_mapping_with_yaml(tmp_path: Path):
obj = {"key1": "value1", "key2": 123} obj = {"key1": "value1", "key2": 123}
result = manifest._yaml_dump_mapping(obj) result = ansible_mod._yaml_dump_mapping(obj)
assert "key1: value1" in result assert "key1: value1" in result
assert "key2:" in result assert "key2:" in result
def test_yaml_dump_mapping_empty(): def test_yaml_dump_mapping_empty():
result = manifest._yaml_dump_mapping({}) result = ansible_mod._yaml_dump_mapping({})
# Empty dict produces '{}' # Empty dict produces '{}'
assert result.strip() == "{}" assert result.strip() == "{}"
def test_yaml_dump_mapping_with_nested(tmp_path: Path): def test_yaml_dump_mapping_with_nested(tmp_path: Path):
obj = {"key1": {"nested": "value"}} obj = {"key1": {"nested": "value"}}
result = manifest._yaml_dump_mapping(obj) result = ansible_mod._yaml_dump_mapping(obj)
assert "nested:" in result assert "nested:" in result
def test_merge_mappings_overwrite_simple(): def test_merge_mappings_overwrite_simple():
existing = {"key1": "old", "key2": "keep"} existing = {"key1": "old", "key2": "keep"}
incoming = {"key1": "new", "key3": "added"} incoming = {"key1": "new", "key3": "added"}
result = manifest._merge_mappings_overwrite(existing, incoming) result = ansible_mod._merge_mappings_overwrite(existing, incoming)
assert result["key1"] == "new" assert result["key1"] == "new"
assert result["key2"] == "keep" assert result["key2"] == "keep"
assert result["key3"] == "added" assert result["key3"] == "added"
@ -1404,16 +1405,16 @@ def test_merge_mappings_overwrite_simple():
def test_merge_mappings_overwrite_nested(): def test_merge_mappings_overwrite_nested():
existing = {"key1": {"a": 1}} existing = {"key1": {"a": 1}}
incoming = {"key1": {"b": 2}} incoming = {"key1": {"b": 2}}
result = manifest._merge_mappings_overwrite(existing, incoming) result = ansible_mod._merge_mappings_overwrite(existing, incoming)
# Nested dicts are replaced, not merged # Nested dicts are replaced, not merged
assert result["key1"] == {"b": 2} assert result["key1"] == {"b": 2}
def test_merge_mappings_overwrite_empty(): def test_merge_mappings_overwrite_empty():
result = manifest._merge_mappings_overwrite({}, {"key": "value"}) result = ansible_mod._merge_mappings_overwrite({}, {"key": "value"})
assert result == {"key": "value"} assert result == {"key": "value"}
result = manifest._merge_mappings_overwrite({"key": "value"}, {}) result = ansible_mod._merge_mappings_overwrite({"key": "value"}, {})
assert result == {"key": "value"} assert result == {"key": "value"}
@ -1422,7 +1423,7 @@ def test_copy2_replace(tmp_path: Path):
src.write_text("content", encoding="utf-8") src.write_text("content", encoding="utf-8")
dst = tmp_path / "dst" / "subdir" / "dst.txt" dst = tmp_path / "dst" / "subdir" / "dst.txt"
manifest._copy2_replace(str(src), str(dst)) ansible_mod._copy2_replace(str(src), str(dst))
assert dst.exists() assert dst.exists()
assert dst.read_text(encoding="utf-8") == "content" assert dst.read_text(encoding="utf-8") == "content"
@ -1434,7 +1435,7 @@ def test_copy2_replace_preserves_metadata(tmp_path: Path):
os.chmod(str(src), 0o644) os.chmod(str(src), 0o644)
dst = tmp_path / "dst.txt" dst = tmp_path / "dst.txt"
manifest._copy2_replace(str(src), str(dst)) ansible_mod._copy2_replace(str(src), str(dst))
assert dst.exists() assert dst.exists()
st = dst.stat() st = dst.stat()
@ -1449,55 +1450,30 @@ def test_copy2_replace_atomic(tmp_path: Path):
# Write initial content # Write initial content
dst.write_text("old", encoding="utf-8") dst.write_text("old", encoding="utf-8")
manifest._copy2_replace(str(src), str(dst)) ansible_mod._copy2_replace(str(src), str(dst))
assert dst.read_text(encoding="utf-8") == "content" assert dst.read_text(encoding="utf-8") == "content"
def test_render_firewall_runtime_tasks_empty(): def test_render_firewall_runtime_tasks_empty():
state = {"roles": {}} result = ansible_mod._render_firewall_runtime_tasks("firewall_runtime")
result = manifest._render_firewall_runtime_tasks(state)
# Function always returns at least a basic playbook structure # Function always returns at least a basic playbook structure
assert isinstance(result, str) assert isinstance(result, str)
assert len(result) > 0 assert len(result) > 0
def test_render_firewall_runtime_tasks_with_iptables(): def test_render_firewall_runtime_tasks_with_iptables():
state = { result = ansible_mod._render_firewall_runtime_tasks("firewall_runtime")
"roles": {
"firewall_runtime": {
"role_name": "firewall_runtime",
"iptables_v4_save": "artifacts/firewall_runtime/iptables.save",
}
}
}
result = manifest._render_firewall_runtime_tasks(state)
assert len(result) >= 1 assert len(result) >= 1
def test_render_firewall_runtime_tasks_with_ipset(): def test_render_firewall_runtime_tasks_with_ipset():
state = { result = ansible_mod._render_firewall_runtime_tasks("firewall_runtime")
"roles": {
"firewall_runtime": {
"role_name": "firewall_runtime",
"ipset_save": "artifacts/firewall_runtime/ipset.save",
}
}
}
result = manifest._render_firewall_runtime_tasks(state)
assert len(result) >= 1 assert len(result) >= 1
def test_render_firewall_runtime_tasks_with_ipv6(): def test_render_firewall_runtime_tasks_with_ipv6():
state = { result = ansible_mod._render_firewall_runtime_tasks("firewall_runtime")
"roles": {
"firewall_runtime": {
"role_name": "firewall_runtime",
"iptables_v6_save": "artifacts/firewall_runtime/ip6tables.save",
}
}
}
result = manifest._render_firewall_runtime_tasks(state)
assert len(result) >= 1 assert len(result) >= 1
@ -1708,6 +1684,93 @@ def test_users_role_without_portable_apps_omits_community_general_tasks(tmp_path
assert "collections:" not in users_meta assert "collections:" not in users_meta
def test_users_role_only_creates_ssh_dir_when_managed_ssh_files_exist(tmp_path):
bundle = tmp_path / "bundle"
out = tmp_path / "out"
(bundle / "artifacts" / "users" / "alice" / ".ssh").mkdir(
parents=True, exist_ok=True
)
(bundle / "artifacts" / "users" / "bob").mkdir(parents=True, exist_ok=True)
(bundle / "artifacts" / "users" / "alice" / ".ssh" / "authorized_keys").write_text(
"ssh-ed25519 example alice\n", encoding="utf-8"
)
(bundle / "artifacts" / "users" / "bob" / ".bashrc").write_text(
"alias ll='ls -l'\n", encoding="utf-8"
)
state = {
"roles": {
"users": {
"role_name": "users",
"users": [
{
"name": "alice",
"uid": 1000,
"home": "/home/alice",
"primary_group": "alice",
"supplementary_groups": [],
},
{
"name": "bob",
"uid": 1001,
"home": "/home/bob",
"primary_group": "bob",
"supplementary_groups": [],
},
{
"name": "carol",
"uid": 1002,
"home": "/home/carol",
"primary_group": "carol",
"supplementary_groups": [],
},
],
"managed_files": [
{
"path": "/home/alice/.ssh/authorized_keys",
"src_rel": "alice/.ssh/authorized_keys",
"mode": "0644",
"reason": "authorized_keys",
},
{
"path": "/home/bob/.bashrc",
"src_rel": "bob/.bashrc",
"mode": "0644",
"reason": "dangerous_user_dotfile",
},
],
"excluded": [],
"notes": [],
},
"services": [],
"packages": [],
},
}
bundle.mkdir(parents=True, exist_ok=True)
(bundle / "state.json").write_text(json.dumps(state, indent=2), encoding="utf-8")
manifest.manifest(str(bundle), str(out))
users_defaults_text = (out / "roles" / "users" / "defaults" / "main.yml").read_text(
encoding="utf-8"
)
users_defaults = ansible_mod._yaml_load_mapping(users_defaults_text)
users_tasks = (out / "roles" / "users" / "tasks" / "main.yml").read_text(
encoding="utf-8"
)
assert users_defaults["users_ssh_dirs"] == [
{
"dest": "/home/alice/.ssh",
"group": "alice",
"mode": "0700",
"owner": "alice",
}
]
assert 'loop: "{{ users_ssh_dirs | default([]) }}"' in users_tasks
assert 'path: "{{ item.ssh_dir }}"' not in users_tasks
assert "users_ssh_files" in users_defaults
def test_manifest_emits_flatpak_role_even_when_no_flatpaks(tmp_path): def test_manifest_emits_flatpak_role_even_when_no_flatpaks(tmp_path):
bundle = tmp_path / "bundle" bundle = tmp_path / "bundle"
out = tmp_path / "out" out = tmp_path / "out"

View file

@ -0,0 +1,91 @@
from __future__ import annotations
from enroll.cm import CMModule
from enroll.ansible import AnsibleRole
def test_ansible_role_extends_cm_module_and_normalises_service_snapshot():
role = AnsibleRole("network")
role.add_service_snapshot(
{
"role_name": "networking",
"unit": "networking.service",
"packages": ["ifupdown"],
"active_state": "active",
"unit_file_state": "enabled",
"managed_dirs": [
{
"path": "/etc/network",
"owner": "root",
"group": "root",
"mode": "0755",
}
],
"managed_files": [
{
"path": "/etc/network/interfaces",
"src_rel": "etc/network/interfaces",
"owner": "root",
"group": "root",
"mode": "0644",
"reason": "service_config",
}
],
"managed_links": [
{
"path": "/etc/systemd/system/multi-user.target.wants/networking.service",
"target": "/usr/lib/systemd/system/networking.service",
}
],
"excluded": [{"path": "/etc/network/secrets", "reason": "secret"}],
"notes": ["captured for test"],
}
)
assert isinstance(role, CMModule)
assert role.sorted_packages == ["ifupdown"]
assert role.dirs["/etc/network"]["mode"] == "0755"
assert role.files["/etc/network/interfaces"]["src_rel"] == "etc/network/interfaces"
assert (
role.links["/etc/systemd/system/multi-user.target.wants/networking.service"][
"src"
]
== "/usr/lib/systemd/system/networking.service"
)
assert role.systemd_units_var == [
{
"name": "networking.service",
"manage": True,
"enabled": True,
"state": "started",
}
]
assert role.excluded == [{"path": "/etc/network/secrets", "reason": "secret"}]
assert role.notes == ["captured for test"]
assert "service `networking.service` from role `networking`" in role.origin_lines
def test_ansible_role_normalises_package_snapshot():
role = AnsibleRole("admin")
role.add_package_snapshot(
{
"role_name": "curl",
"package": "curl",
"managed_files": [
{
"path": "/etc/curlrc",
"src_rel": "etc/curlrc",
"owner": "root",
"group": "root",
"mode": "0644",
}
],
}
)
assert isinstance(role, CMModule)
assert role.sorted_packages == ["curl"]
assert role.files["/etc/curlrc"]["dest"] == "/etc/curlrc"
assert role.services == {}
assert role.origin_lines == ["package `curl` from role `curl`"]