diff --git a/enroll/capture.py b/enroll/capture.py new file mode 100644 index 0000000..24acc15 --- /dev/null +++ b/enroll/capture.py @@ -0,0 +1,275 @@ +from __future__ import annotations + +import os +import shutil +import stat +from typing import List, Optional, Set + +from .fsutil import stat_triplet +from .harvest_types import ExcludedFile, ManagedFile, ManagedLink +from .ignore import IgnorePolicy +from .pathfilter import PathFilter + + +def files_differ(a: str, b: str, *, max_bytes: int = 2_000_000) -> bool: + """Return True if file ``a`` differs from file ``b``. + + Best-effort and conservative: unreadable/missing baselines, non-regular + files, and unexpectedly large files are treated as different so callers err + on the side of preserving user state. + """ + + try: + st_a = os.stat(a, follow_symlinks=True) + except OSError: + return True + + if not stat.S_ISREG(st_a.st_mode): + return True + + try: + st_b = os.stat(b, follow_symlinks=True) + except OSError: + return True + + if not stat.S_ISREG(st_b.st_mode): + return True + + if st_a.st_size != st_b.st_size: + return True + + if st_a.st_size > max_bytes: + return True + + try: + with open(a, "rb") as fa, open(b, "rb") as fb: + while True: + ca = fa.read(1024 * 64) + cb = fb.read(1024 * 64) + if ca != cb: + return True + if not ca: + return False + except OSError: + return True + + +def copy_into_bundle( + bundle_dir: str, role_name: str, abs_path: str, src_rel: str +) -> None: + dst = os.path.join(bundle_dir, "artifacts", role_name, src_rel) + os.makedirs(os.path.dirname(dst), exist_ok=True) + shutil.copy2(abs_path, dst) + + +def capture_file( + *, + bundle_dir: str, + role_name: str, + abs_path: str, + reason: str, + policy: IgnorePolicy, + path_filter: PathFilter, + managed_out: List[ManagedFile], + excluded_out: List[ExcludedFile], + seen_role: Optional[Set[str]] = None, + seen_global: Optional[Set[str]] = None, + metadata: Optional[tuple[str, str, str]] = None, +) -> bool: + """Try to capture a single file into the bundle. + + Returns True if the file was copied and appended to ``managed_out``. + ``seen_role`` de-duplicates within a role; ``seen_global`` de-duplicates + across harvest stages so multiple generated roles do not manage one path. + """ + + if seen_global is not None and abs_path in seen_global: + return False + if seen_role is not None and abs_path in seen_role: + return False + + def _mark_seen() -> None: + if seen_role is not None: + seen_role.add(abs_path) + if seen_global is not None: + seen_global.add(abs_path) + + if path_filter.is_excluded(abs_path): + excluded_out.append(ExcludedFile(path=abs_path, reason="user_excluded")) + _mark_seen() + return False + + deny = policy.deny_reason(abs_path) + if deny: + excluded_out.append(ExcludedFile(path=abs_path, reason=deny)) + _mark_seen() + return False + + try: + owner, group, mode = ( + metadata if metadata is not None else stat_triplet(abs_path) + ) + except OSError: + excluded_out.append(ExcludedFile(path=abs_path, reason="unreadable")) + _mark_seen() + return False + + src_rel = abs_path.lstrip("/") + try: + copy_into_bundle(bundle_dir, role_name, abs_path, src_rel) + except OSError: + excluded_out.append(ExcludedFile(path=abs_path, reason="unreadable")) + _mark_seen() + return False + + managed_out.append( + ManagedFile( + path=abs_path, + src_rel=src_rel, + owner=owner, + group=group, + mode=mode, + reason=reason, + ) + ) + _mark_seen() + return True + + +USER_SHELL_DOTFILES_WITH_SKEL_BASELINE = [ + (".bashrc", "user_shell_rc"), + (".profile", "user_profile"), + (".bash_logout", "user_shell_logout"), +] + +USER_SHELL_DOTFILES_WITHOUT_SKEL_BASELINE = [ + (".bash_aliases", "user_shell_aliases"), +] + + +def capture_user_shell_dotfiles( + *, + bundle_dir: str, + role_name: str, + home: str, + skel_dir: str, + enabled: bool, + policy: IgnorePolicy, + path_filter: PathFilter, + managed_out: List[ManagedFile], + excluded_out: List[ExcludedFile], + seen_role: Optional[Set[str]], + seen_global: Optional[Set[str]], +) -> int: + """Capture selected per-user shell dotfiles when explicitly enabled.""" + + if not enabled: + return 0 + + home = (home or "").rstrip("/") + if not home or not home.startswith("/"): + return 0 + + captured = 0 + max_compare_bytes = int(getattr(policy, "max_file_bytes", 256_000)) + + for rel, reason in USER_SHELL_DOTFILES_WITH_SKEL_BASELINE: + upath = os.path.join(home, rel) + if not os.path.isfile(upath) or os.path.islink(upath): + continue + skel_path = os.path.join(skel_dir, rel) + if not files_differ(upath, skel_path, max_bytes=max_compare_bytes): + continue + if capture_file( + bundle_dir=bundle_dir, + role_name=role_name, + abs_path=upath, + reason=reason, + policy=policy, + path_filter=path_filter, + managed_out=managed_out, + excluded_out=excluded_out, + seen_role=seen_role, + seen_global=seen_global, + ): + captured += 1 + + for rel, reason in USER_SHELL_DOTFILES_WITHOUT_SKEL_BASELINE: + upath = os.path.join(home, rel) + if not os.path.isfile(upath) or os.path.islink(upath): + continue + if capture_file( + bundle_dir=bundle_dir, + role_name=role_name, + abs_path=upath, + reason=reason, + policy=policy, + path_filter=path_filter, + managed_out=managed_out, + excluded_out=excluded_out, + seen_role=seen_role, + seen_global=seen_global, + ): + captured += 1 + + return captured + + +def capture_link( + *, + role_name: str, + abs_path: str, + reason: str, + policy: IgnorePolicy, + path_filter: PathFilter, + managed_out: List[ManagedLink], + excluded_out: List[ExcludedFile], + seen_role: Optional[Set[str]] = None, + seen_global: Optional[Set[str]] = None, +) -> bool: + """Record a symlink for later materialisation by the manifest renderer.""" + + if seen_global is not None and abs_path in seen_global: + return False + if seen_role is not None and abs_path in seen_role: + return False + + def _mark_seen() -> None: + if seen_role is not None: + seen_role.add(abs_path) + if seen_global is not None: + seen_global.add(abs_path) + + if path_filter.is_excluded(abs_path): + excluded_out.append(ExcludedFile(path=abs_path, reason="user_excluded")) + _mark_seen() + return False + + deny_link = getattr(policy, "deny_reason_link", None) + if callable(deny_link): + deny = deny_link(abs_path) + else: + deny = policy.deny_reason(abs_path) + if deny in ("not_regular_file", "not_file", "not_regular"): + deny = None + + if deny: + excluded_out.append(ExcludedFile(path=abs_path, reason=deny)) + _mark_seen() + return False + + if not os.path.islink(abs_path): + excluded_out.append(ExcludedFile(path=abs_path, reason="not_symlink")) + _mark_seen() + return False + + try: + target = os.readlink(abs_path) + except OSError: + excluded_out.append(ExcludedFile(path=abs_path, reason="unreadable")) + _mark_seen() + return False + + managed_out.append(ManagedLink(path=abs_path, target=target, reason=reason)) + _mark_seen() + return True diff --git a/enroll/harvest.py b/enroll/harvest.py index 43f3a90..5ef01c8 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -1,20 +1,17 @@ from __future__ import annotations -import glob import os -from importlib import import_module import re import shutil import shlex import stat import subprocess # nosec import time -from dataclasses import dataclass, asdict, field +from dataclasses import asdict from typing import Any, Dict, List, Optional, Set, Tuple from . import accounts as _accounts from . import systemd as _systemd -from .role_names import avoid_reserved_role_name from .fsutil import stat_triplet from .platform import detect_platform, get_backend from .ignore import IgnorePolicy @@ -22,29 +19,23 @@ from .pathfilter import PathFilter from .version import get_enroll_version from .state import write_state from .harvest_collectors.context import HarvestContext +from .harvest_types import ( + EtcCustomSnapshot, + ExcludedFile, + FirewallRuntimeSnapshot, + ManagedDir, + ManagedFile, + PackageSnapshot, + ServiceSnapshot, + SysctlSnapshot, +) + +from .capture import capture_file +from . import system_paths +from .package_hints import package_section_from_installations, safe_name UnitQueryError = _systemd.UnitQueryError -_COLLECTOR_REEXPORTS = { - "CronLogrotateCollector": ".harvest_collectors.cron_logrotate", - "ExtraPathsCollector": ".harvest_collectors.paths", - "PackageManagerConfigCollector": ".harvest_collectors.package_manager", - "RuntimeStateCollector": ".harvest_collectors.runtime", - "ServicePackageCollector": ".harvest_collectors.services", - "UsersCollector": ".harvest_collectors.users", - "UsrLocalCustomCollector": ".harvest_collectors.paths", -} - - -def __getattr__(name: str) -> Any: - module_name = _COLLECTOR_REEXPORTS.get(name) - if module_name is None: - raise AttributeError(f"module {__name__!r} has no attribute {name!r}") - module = import_module(module_name, __package__) - value = getattr(module, name) - globals()[name] = value - return value - def list_enabled_services() -> List[str]: return _systemd.list_enabled_services() @@ -66,241 +57,6 @@ def collect_non_system_users() -> List[Any]: return _accounts.collect_non_system_users() -@dataclass -class ManagedFile: - path: str - src_rel: str - owner: str - group: str - mode: str - reason: str - - -@dataclass -class ManagedLink: - """A symlink we want to materialise on the target host. - - For configuration enablement patterns (e.g. sites-enabled), the symlink is - meaningful state even when the link target is captured elsewhere. - """ - - path: str - target: str - reason: str - - -@dataclass -class ManagedDir: - path: str - owner: str - group: str - mode: str - reason: str - - -@dataclass -class ExcludedFile: - path: str - reason: str - - -@dataclass -class ServiceSnapshot: - unit: str - role_name: str - packages: List[str] - active_state: Optional[str] - sub_state: Optional[str] - unit_file_state: Optional[str] - condition_result: Optional[str] - managed_dirs: List[ManagedDir] = field(default_factory=list) - managed_files: List[ManagedFile] = field(default_factory=list) - managed_links: List[ManagedLink] = field(default_factory=list) - excluded: List[ExcludedFile] = field(default_factory=list) - notes: List[str] = field(default_factory=list) - - -@dataclass -class PackageSnapshot: - package: str - role_name: str - section: Optional[str] = None - managed_dirs: List[ManagedDir] = field(default_factory=list) - managed_files: List[ManagedFile] = field(default_factory=list) - managed_links: List[ManagedLink] = field(default_factory=list) - excluded: List[ExcludedFile] = field(default_factory=list) - notes: List[str] = field(default_factory=list) - has_config: bool = True # False if package has no config/systemd/cron files - - -@dataclass -class UsersSnapshot: - role_name: str - users: List[dict] - managed_dirs: List[ManagedDir] = field(default_factory=list) - managed_files: List[ManagedFile] = field(default_factory=list) - excluded: List[ExcludedFile] = field(default_factory=list) - notes: List[str] = field(default_factory=list) - user_flatpaks: Dict[str, List[Dict[str, Any]]] = field(default_factory=dict) - user_flatpak_remotes: List[Dict[str, Any]] = field(default_factory=list) - - -@dataclass -class FlatpakSnapshot: - role_name: str - system_flatpaks: List[Dict[str, Any]] = field(default_factory=list) - remotes: List[Dict[str, Any]] = field(default_factory=list) - notes: List[str] = field(default_factory=list) - - -@dataclass -class SnapSnapshot: - role_name: str - system_snaps: List[Dict[str, Any]] = field(default_factory=list) - notes: List[str] = field(default_factory=list) - - -@dataclass -class AptConfigSnapshot: - role_name: str - managed_dirs: List[ManagedDir] = field(default_factory=list) - managed_files: List[ManagedFile] = field(default_factory=list) - excluded: List[ExcludedFile] = field(default_factory=list) - notes: List[str] = field(default_factory=list) - - -@dataclass -class DnfConfigSnapshot: - role_name: str - managed_dirs: List[ManagedDir] = field(default_factory=list) - managed_files: List[ManagedFile] = field(default_factory=list) - excluded: List[ExcludedFile] = field(default_factory=list) - notes: List[str] = field(default_factory=list) - - -@dataclass -class EtcCustomSnapshot: - role_name: str - managed_dirs: List[ManagedDir] = field(default_factory=list) - managed_files: List[ManagedFile] = field(default_factory=list) - excluded: List[ExcludedFile] = field(default_factory=list) - notes: List[str] = field(default_factory=list) - - -@dataclass -class UsrLocalCustomSnapshot: - role_name: str - managed_dirs: List[ManagedDir] = field(default_factory=list) - managed_files: List[ManagedFile] = field(default_factory=list) - excluded: List[ExcludedFile] = field(default_factory=list) - notes: List[str] = field(default_factory=list) - - -@dataclass -class ExtraPathsSnapshot: - role_name: str - include_patterns: List[str] = field(default_factory=list) - exclude_patterns: List[str] = field(default_factory=list) - managed_dirs: List[ManagedDir] = field(default_factory=list) - managed_files: List[ManagedFile] = field(default_factory=list) - managed_links: List[ManagedLink] = field(default_factory=list) - excluded: List[ExcludedFile] = field(default_factory=list) - notes: List[str] = field(default_factory=list) - - -@dataclass -class FirewallRuntimeSnapshot: - role_name: str - packages: List[str] = field(default_factory=list) - ipset_save: Optional[str] = None - ipset_sets: List[str] = field(default_factory=list) - iptables_v4_save: Optional[str] = None - iptables_v6_save: Optional[str] = None - notes: List[str] = field(default_factory=list) - - -@dataclass -class SysctlSnapshot: - role_name: str - managed_files: List[ManagedFile] = field(default_factory=list) - parameters: Dict[str, str] = field(default_factory=dict) - notes: List[str] = field(default_factory=list) - - -ALLOWED_UNOWNED_EXTS = { - ".cfg", - ".cnf", - ".conf", - ".ini", - ".json", - ".link", - ".mount", - ".netdev", - ".network", - ".path", - ".rules", - ".service", - ".socket", - ".target", - ".timer", - ".toml", - ".yaml", - ".yml", - "", # allow extensionless (common in /etc/default and /etc/init.d) -} - -MAX_FILES_CAP = 4000 -MAX_UNOWNED_FILES_PER_ROLE = 500 - - -def _files_differ(a: str, b: str, *, max_bytes: int = 2_000_000) -> bool: - """Return True if file `a` differs from file `b`. - - Best-effort and conservative: - - If `b` (baseline) does not exist or is not a regular file, treat as - "different" so we err on the side of capturing user state. - - If we can't stat/read either file, treat as "different" (capture will - later be filtered via IgnorePolicy). - - If files are large, avoid reading them fully. - """ - - try: - st_a = os.stat(a, follow_symlinks=True) - except OSError: - return True - - # Refuse to do content comparisons on non-regular files. - if not stat.S_ISREG(st_a.st_mode): - return True - - try: - st_b = os.stat(b, follow_symlinks=True) - except OSError: - return True - - if not stat.S_ISREG(st_b.st_mode): - return True - - if st_a.st_size != st_b.st_size: - return True - - # If it's unexpectedly big, treat as different to avoid expensive reads. - if st_a.st_size > max_bytes: - return True - - try: - with open(a, "rb") as fa, open(b, "rb") as fb: - while True: - ca = fa.read(1024 * 64) - cb = fb.read(1024 * 64) - if ca != cb: - return True - if not ca: # EOF on both - return False - except OSError: - return True - - def _merge_parent_dirs( existing_dirs: List[ManagedDir], managed_files: List[ManagedFile], @@ -376,721 +132,6 @@ def _merge_parent_dirs( return [by_path[k] for k in sorted(by_path)] -# Directories that are shared across many packages. -# Never attribute all unowned files in these trees -# to one single package. -SHARED_ETC_TOPDIRS = { - "apparmor.d", - "apt", - "cron.d", - "cron.daily", - "cron.weekly", - "cron.monthly", - "cron.hourly", - "default", - "init.d", - "logrotate.d", - "modprobe.d", - "network", - "pam.d", - "ssh", - "ssl", - "sudoers.d", - "sysctl.d", - "systemd", - # RPM-family shared trees - "dnf", - "yum", - "yum.repos.d", - "sysconfig", - "pki", - "firewalld", -} - - -def _safe_name(s: str) -> str: - out: List[str] = [] - for ch in s: - out.append(ch if ch.isalnum() or ch in ("_", "-") else "_") - return "".join(out).replace("-", "_") - - -def _role_id(raw: str) -> str: - # normalise separators first - s = re.sub(r"[^A-Za-z0-9]+", "_", raw) - # split CamelCase -> snake_case - s = re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", s) - s = s.lower() - s = re.sub(r"_+", "_", s).strip("_") - if not re.match(r"^[a-z_]", s): - s = "r_" + s - return s - - -def _role_name_from_unit(unit: str) -> str: - base = _role_id(unit.removesuffix(".service")) - return avoid_reserved_role_name(_safe_name(base), prefix="service") - - -def _role_name_from_pkg(pkg: str) -> str: - return avoid_reserved_role_name(_safe_name(pkg), prefix="package") - - -def _package_section_from_installations( - installs: List[Dict[str, str]], -) -> Optional[str]: - """Return a stable package grouping label from installed package metadata. - - Debian exposes this as ``Section``. RPM-family distributions have a broadly - similar ``Group`` tag, although modern Fedora/RHEL packages may omit it or - set it to ``Unspecified``. - """ - - values: Set[str] = set() - for inst in installs or []: - value = (inst.get("section") or inst.get("group") or "").strip() - if not value: - continue - if value.lower() in {"(none)", "none", "unspecified"}: - continue - values.add(value) - - if not values: - return None - return sorted(values)[0] - - -def _copy_into_bundle( - bundle_dir: str, role_name: str, abs_path: str, src_rel: str -) -> None: - dst = os.path.join(bundle_dir, "artifacts", role_name, src_rel) - os.makedirs(os.path.dirname(dst), exist_ok=True) - shutil.copy2(abs_path, dst) - - -def _capture_file( - *, - bundle_dir: str, - role_name: str, - abs_path: str, - reason: str, - policy: IgnorePolicy, - path_filter: PathFilter, - managed_out: List[ManagedFile], - excluded_out: List[ExcludedFile], - seen_role: Optional[Set[str]] = None, - seen_global: Optional[Set[str]] = None, - metadata: Optional[tuple[str, str, str]] = None, -) -> bool: - """Try to capture a single file into the bundle. - - Returns True if the file was copied (managed), False otherwise. - - * seen_role: de-dupe within a role (prevents duplicate tasks/records) - * seen_global: de-dupe across roles/stages (prevents multiple roles copying same path) - * metadata: optional (owner, group, mode) tuple to avoid re-statting - """ - - if seen_global is not None and abs_path in seen_global: - return False - if seen_role is not None and abs_path in seen_role: - return False - - def _mark_seen() -> None: - if seen_role is not None: - seen_role.add(abs_path) - if seen_global is not None: - seen_global.add(abs_path) - - if path_filter.is_excluded(abs_path): - excluded_out.append(ExcludedFile(path=abs_path, reason="user_excluded")) - _mark_seen() - return False - - deny = policy.deny_reason(abs_path) - if deny: - excluded_out.append(ExcludedFile(path=abs_path, reason=deny)) - _mark_seen() - return False - - try: - owner, group, mode = ( - metadata if metadata is not None else stat_triplet(abs_path) - ) - except OSError: - excluded_out.append(ExcludedFile(path=abs_path, reason="unreadable")) - _mark_seen() - return False - - src_rel = abs_path.lstrip("/") - try: - _copy_into_bundle(bundle_dir, role_name, abs_path, src_rel) - except OSError: - excluded_out.append(ExcludedFile(path=abs_path, reason="unreadable")) - _mark_seen() - return False - - managed_out.append( - ManagedFile( - path=abs_path, - src_rel=src_rel, - owner=owner, - group=group, - mode=mode, - reason=reason, - ) - ) - _mark_seen() - return True - - -USER_SHELL_DOTFILES_WITH_SKEL_BASELINE = [ - (".bashrc", "user_shell_rc"), - (".profile", "user_profile"), - (".bash_logout", "user_shell_logout"), -] - -USER_SHELL_DOTFILES_WITHOUT_SKEL_BASELINE = [ - (".bash_aliases", "user_shell_aliases"), -] - - -def _capture_user_shell_dotfiles( - *, - bundle_dir: str, - role_name: str, - home: str, - skel_dir: str, - enabled: bool, - policy: IgnorePolicy, - path_filter: PathFilter, - managed_out: List[ManagedFile], - excluded_out: List[ExcludedFile], - seen_role: Optional[Set[str]], - seen_global: Optional[Set[str]], -) -> int: - """Capture selected per-user shell dotfiles when explicitly enabled. - - Shell startup files are useful for reproducing interactive accounts, but they - commonly contain exported tokens, passwords, command aliases with embedded - credentials, and other private context. For that reason, automatic capture is - gated by harvest's dangerous mode. Users who want a narrower safe-mode - selection can still use --include-path, which lands in the extra_paths role - and remains subject to IgnorePolicy content checks. - """ - - if not enabled: - return 0 - - home = (home or "").rstrip("/") - if not home or not home.startswith("/"): - return 0 - - captured = 0 - max_compare_bytes = int(getattr(policy, "max_file_bytes", 256_000)) - - for rel, reason in USER_SHELL_DOTFILES_WITH_SKEL_BASELINE: - upath = os.path.join(home, rel) - if not os.path.isfile(upath) or os.path.islink(upath): - continue - skel_path = os.path.join(skel_dir, rel) - if not _files_differ(upath, skel_path, max_bytes=max_compare_bytes): - continue - if _capture_file( - bundle_dir=bundle_dir, - role_name=role_name, - abs_path=upath, - reason=reason, - policy=policy, - path_filter=path_filter, - managed_out=managed_out, - excluded_out=excluded_out, - seen_role=seen_role, - seen_global=seen_global, - ): - captured += 1 - - for rel, reason in USER_SHELL_DOTFILES_WITHOUT_SKEL_BASELINE: - upath = os.path.join(home, rel) - if not os.path.isfile(upath) or os.path.islink(upath): - continue - if _capture_file( - bundle_dir=bundle_dir, - role_name=role_name, - abs_path=upath, - reason=reason, - policy=policy, - path_filter=path_filter, - managed_out=managed_out, - excluded_out=excluded_out, - seen_role=seen_role, - seen_global=seen_global, - ): - captured += 1 - - return captured - - -def _capture_link( - *, - role_name: str, - abs_path: str, - reason: str, - policy: IgnorePolicy, - path_filter: PathFilter, - managed_out: List[ManagedLink], - excluded_out: List[ExcludedFile], - seen_role: Optional[Set[str]] = None, - seen_global: Optional[Set[str]] = None, -) -> bool: - """Try to capture a symlink into the manifest. - - NOTE: Symlinks are *not* copied into artifacts; we record their link target - and materialise them via ansible.builtin.file state=link. - """ - - if seen_global is not None and abs_path in seen_global: - return False - if seen_role is not None and abs_path in seen_role: - return False - - def _mark_seen() -> None: - if seen_role is not None: - seen_role.add(abs_path) - if seen_global is not None: - seen_global.add(abs_path) - - if path_filter.is_excluded(abs_path): - excluded_out.append(ExcludedFile(path=abs_path, reason="user_excluded")) - _mark_seen() - return False - - deny_link = getattr(policy, "deny_reason_link", None) - if callable(deny_link): - deny = deny_link(abs_path) - else: - # Fallback: apply deny_reason() but treat "not_regular_file" as acceptable - # for symlinks. - deny = policy.deny_reason(abs_path) - if deny in ("not_regular_file", "not_file", "not_regular"): - deny = None - - if deny: - excluded_out.append(ExcludedFile(path=abs_path, reason=deny)) - _mark_seen() - return False - - if not os.path.islink(abs_path): - excluded_out.append(ExcludedFile(path=abs_path, reason="not_symlink")) - _mark_seen() - return False - - try: - target = os.readlink(abs_path) - except OSError: - excluded_out.append(ExcludedFile(path=abs_path, reason="unreadable")) - _mark_seen() - return False - - managed_out.append(ManagedLink(path=abs_path, target=target, reason=reason)) - _mark_seen() - return True - - -def _is_confish(path: str) -> bool: - base = os.path.basename(path) - _, ext = os.path.splitext(base) - return ext in ALLOWED_UNOWNED_EXTS - - -def _hint_names(unit: str, pkgs: Set[str]) -> Set[str]: - base = unit.removesuffix(".service") - hints = {base} - if "@" in base: - hints.add(base.split("@", 1)[0]) - hints |= set(pkgs) - hints |= {h.split(".", 1)[0] for h in list(hints) if "." in h} - return {h for h in hints if h} - - -def _add_pkgs_from_etc_topdirs( - hints: Set[str], topdir_to_pkgs: Dict[str, Set[str]], pkgs: Set[str] -) -> None: - """Expand a service's package set using dpkg-owned /etc top-level dirs. - - This is a heuristic: many Debian packages split a service across multiple - packages (e.g. nginx + nginx-common) while sharing a single /etc/ - tree. - - We intentionally *avoid* using shared trees (e.g. /etc/cron.d, /etc/ssl, - /etc/apparmor.d) to expand package sets, because many unrelated packages - legitimately install files there. - - We also consider the common ".d" variant (e.g. hint "apparmor" -> - topdir "apparmor.d") so we can explicitly skip known shared trees. - """ - - for h in hints: - for top in (h, f"{h}.d"): - if top in SHARED_ETC_TOPDIRS: - continue - for p in topdir_to_pkgs.get(top, set()): - pkgs.add(p) - - -def _maybe_add_specific_paths(hints: Set[str], backend) -> List[str]: - # Delegate to backend-specific conventions (e.g. /etc/default on Debian, - # /etc/sysconfig on Fedora/RHEL). Always include sysctl.d. - try: - return backend.specific_paths_for_hints(hints) - except Exception: - # Best-effort fallback (Debian-ish). - paths: List[str] = [] - for h in hints: - paths.extend( - [ - f"/etc/default/{h}", - f"/etc/init.d/{h}", - f"/etc/sysctl.d/{h}.conf", - ] - ) - return paths - - -def _scan_unowned_under_roots( - roots: List[str], - owned_etc: Set[str], - limit: int = MAX_UNOWNED_FILES_PER_ROLE, - *, - confish_only: bool = True, -) -> List[str]: - found: List[str] = [] - for root in roots: - if not os.path.isdir(root): - continue - for dirpath, _, filenames in os.walk(root): - if len(found) >= limit: - return found - for fn in filenames: - if len(found) >= limit: - return found - p = os.path.join(dirpath, fn) - if not p.startswith("/etc/"): - continue - if p in owned_etc: - continue - if not os.path.isfile(p) or os.path.islink(p): - continue - if confish_only and not _is_confish(p): - continue - found.append(p) - return found - - -def _topdirs_for_package(pkg: str, pkg_to_etc_paths: Dict[str, List[str]]) -> Set[str]: - topdirs: Set[str] = set() - for path in pkg_to_etc_paths.get(pkg, []): - parts = path.split("/", 3) - if len(parts) >= 3 and parts[1] == "etc" and parts[2]: - topdirs.add(parts[2]) - return topdirs - - -# ------------------------- -# System capture helpers -# ------------------------- - -_APT_SOURCE_GLOBS = [ - "/etc/apt/sources.list", - "/etc/apt/sources.list.d/*.list", - "/etc/apt/sources.list.d/*.sources", -] - -_APT_MISC_GLOBS = [ - "/etc/apt/apt.conf", - "/etc/apt/apt.conf.d/*", - "/etc/apt/preferences", - "/etc/apt/preferences.d/*", - "/etc/apt/auth.conf", - "/etc/apt/auth.conf.d/*", - "/etc/apt/trusted.gpg", - "/etc/apt/trusted.gpg.d/*", - "/etc/apt/keyrings/*", -] - -_SYSTEM_CAPTURE_GLOBS: List[tuple[str, str]] = [ - # mounts - ("/etc/fstab", "system_mounts"), - ("/etc/crypttab", "system_mounts"), - # sysctl / modules - ("/etc/sysctl.conf", "system_sysctl"), - ("/etc/sysctl.d/*", "system_sysctl"), - ("/etc/modprobe.d/*", "system_modprobe"), - ("/etc/modules", "system_modprobe"), - ("/etc/modules-load.d/*", "system_modprobe"), - # network - ("/etc/netplan/*", "system_network"), - ("/etc/systemd/network/*", "system_network"), - ("/etc/network/interfaces", "system_network"), - ("/etc/network/interfaces.d/*", "system_network"), - ("/etc/resolvconf.conf", "system_network"), - ("/etc/resolvconf/resolv.conf.d/*", "system_network"), - ("/etc/NetworkManager/system-connections/*", "system_network"), - ("/etc/sysconfig/network*", "system_network"), - ("/etc/sysconfig/network-scripts/*", "system_network"), - # firewall - ("/etc/nftables.conf", "system_firewall"), - ("/etc/nftables.d/*", "system_firewall"), - ("/etc/iptables/rules.v4", "system_firewall"), - ("/etc/iptables/rules.v6", "system_firewall"), - ("/etc/sysconfig/iptables", "system_firewall"), - ("/etc/sysconfig/ip6tables", "system_firewall"), - ("/etc/ipset.conf", "system_firewall"), - ("/etc/ipset/*", "system_firewall"), - ("/etc/ipset.d/*", "system_firewall"), - ("/etc/sysconfig/ipset", "system_firewall"), - ("/etc/default/ipset", "system_firewall"), - ("/etc/ufw/*", "system_firewall"), - ("/etc/default/ufw", "system_firewall"), - ("/etc/firewalld/*", "system_firewall"), - ("/etc/firewalld/zones/*", "system_firewall"), - # SELinux - ("/etc/selinux/config", "system_security"), - # other - ("/etc/rc.local", "system_rc"), -] - - -# Persistent firewall files that are treated as authoritative for their -# respective runtime state. If any matching file exists, the runtime capture -# for that family is retained only as static managed-file harvest output and -# not duplicated through the generated firewall_runtime role. -_PERSISTENT_IPTABLES_V4_GLOBS = [ - "/etc/iptables/rules.v4", - "/etc/sysconfig/iptables", -] - -_PERSISTENT_IPTABLES_V6_GLOBS = [ - "/etc/iptables/rules.v6", - "/etc/sysconfig/ip6tables", -] - -_PERSISTENT_IPSET_GLOBS = [ - "/etc/ipset.conf", - "/etc/ipset/*", - "/etc/ipset.d/*", - "/etc/sysconfig/ipset", -] - - -def _persistent_firewall_files(globs: List[str]) -> List[str]: - """Return persistent firewall files matching ``globs``. - - This intentionally uses the same file walking helper as the static system - capture path so the runtime fallback decision matches what Enroll can - harvest as managed files. - """ - seen: Set[str] = set() - out: List[str] = [] - for spec in globs: - for path in _iter_matching_files(spec): - if path in seen: - continue - seen.add(path) - out.append(path) - return sorted(out) - - -def _iter_matching_files(spec: str, *, cap: int = MAX_FILES_CAP) -> List[str]: - """Expand a glob spec and also walk directories to collect files.""" - out: List[str] = [] - for p in glob.glob(spec): - if len(out) >= cap: - break - if os.path.islink(p): - continue - if os.path.isfile(p): - out.append(p) - continue - if os.path.isdir(p): - for dirpath, _, filenames in os.walk(p): - for fn in filenames: - if len(out) >= cap: - break - fp = os.path.join(dirpath, fn) - if os.path.islink(fp) or not os.path.isfile(fp): - continue - out.append(fp) - if len(out) >= cap: - break - return out - - -def _parse_apt_signed_by(source_files: List[str]) -> Set[str]: - """Return absolute keyring paths referenced via signed-by / Signed-By.""" - out: Set[str] = set() - - # deb line: deb [signed-by=/usr/share/keyrings/foo.gpg] ... - re_signed_by = re.compile(r"signed-by\s*=\s*([^\]\s]+)", re.IGNORECASE) - # deb822: Signed-By: /usr/share/keyrings/foo.gpg - re_signed_by_hdr = re.compile(r"^\s*Signed-By\s*:\s*(.+)$", re.IGNORECASE) - - for sf in source_files: - try: - with open(sf, "r", encoding="utf-8", errors="replace") as f: - for raw in f: - line = raw.strip() - if not line or line.startswith("#"): - continue - - m = re_signed_by_hdr.match(line) - if m: - val = m.group(1).strip() - if val.startswith("|"): - continue - toks = re.split(r"[\s,]+", val) - for t in toks: - if t.startswith("/"): - out.add(t) - continue - - # Try bracketed options first (common for .list files) - if "[" in line and "]" in line: - bracket = line.split("[", 1)[1].split("]", 1)[0] - for mm in re_signed_by.finditer(bracket): - val = mm.group(1).strip().strip("\"'") - for t in re.split(r"[\s,]+", val): - if t.startswith("/"): - out.add(t) - continue - - # Fallback: signed-by= in whole line - for mm in re_signed_by.finditer(line): - val = mm.group(1).strip().strip("\"'") - for t in re.split(r"[\s,]+", val): - if t.startswith("/"): - out.add(t) - except OSError: - continue - - return out - - -def _iter_apt_capture_paths() -> List[tuple[str, str]]: - """Return (path, reason) pairs for APT configuration. - - This captures the full /etc/apt tree (subject to IgnorePolicy at copy time), - plus any keyrings referenced via signed-by/Signed-By which may live outside - /etc (e.g. /usr/share/keyrings). - """ - reasons: Dict[str, str] = {} - - # Capture all regular files under /etc/apt (no symlinks). - if os.path.isdir("/etc/apt"): - for dirpath, _, filenames in os.walk("/etc/apt"): - for fn in filenames: - p = os.path.join(dirpath, fn) - if os.path.islink(p) or not os.path.isfile(p): - continue - reasons.setdefault(p, "apt_config") - - # Identify source files explicitly for nicer reasons and keyring discovery. - apt_sources: List[str] = [] - for g in _APT_SOURCE_GLOBS: - apt_sources.extend(_iter_matching_files(g)) - for p in sorted(set(apt_sources)): - reasons[p] = "apt_source" - - # Keyrings in standard locations. - for g in ( - "/etc/apt/trusted.gpg", - "/etc/apt/trusted.gpg.d/*", - "/etc/apt/keyrings/*", - ): - for p in _iter_matching_files(g): - reasons[p] = "apt_keyring" - - # Keyrings referenced by sources (may live outside /etc/apt). - signed_by = _parse_apt_signed_by(sorted(set(apt_sources))) - for p in sorted(signed_by): - if os.path.islink(p) or not os.path.isfile(p): - continue - if p.startswith("/etc/apt/"): - reasons[p] = "apt_keyring" - else: - reasons[p] = "apt_signed_by_keyring" - - # De-dup with stable ordering. - uniq: List[tuple[str, str]] = [] - for p in sorted(reasons.keys()): - uniq.append((p, reasons[p])) - return uniq - - -def _iter_dnf_capture_paths() -> List[tuple[str, str]]: - """Return (path, reason) pairs for DNF/YUM configuration on RPM systems. - - Captures: - - /etc/dnf/* (dnf.conf, vars, plugins, modules, automatic) - - /etc/yum.conf (legacy) - - /etc/yum.repos.d/*.repo - - /etc/pki/rpm-gpg/* (GPG key files) - """ - reasons: Dict[str, str] = {} - - for root, tag in ( - ("/etc/dnf", "dnf_config"), - ("/etc/yum", "yum_config"), - ): - if os.path.isdir(root): - for dirpath, _, filenames in os.walk(root): - for fn in filenames: - p = os.path.join(dirpath, fn) - if os.path.islink(p) or not os.path.isfile(p): - continue - reasons.setdefault(p, tag) - - # Legacy yum.conf. - if os.path.isfile("/etc/yum.conf") and not os.path.islink("/etc/yum.conf"): - reasons.setdefault("/etc/yum.conf", "yum_conf") - - # Repositories. - if os.path.isdir("/etc/yum.repos.d"): - for p in _iter_matching_files("/etc/yum.repos.d/*.repo"): - reasons[p] = "yum_repo" - - # RPM GPG keys. - if os.path.isdir("/etc/pki/rpm-gpg"): - for dirpath, _, filenames in os.walk("/etc/pki/rpm-gpg"): - for fn in filenames: - p = os.path.join(dirpath, fn) - if os.path.islink(p) or not os.path.isfile(p): - continue - reasons.setdefault(p, "rpm_gpg_key") - - # Stable ordering. - return [(p, reasons[p]) for p in sorted(reasons.keys())] - - -def _iter_system_capture_paths() -> List[tuple[str, str]]: - """Return (path, reason) pairs for essential system config/state (non-APT).""" - out: List[tuple[str, str]] = [] - - for spec, reason in _SYSTEM_CAPTURE_GLOBS: - for p in _iter_matching_files(spec): - out.append((p, reason)) - - # De-dup while preserving first reason - seen: Set[str] = set() - uniq: List[tuple[str, str]] = [] - for p, r in out: - if p in seen: - continue - seen.add(p) - uniq.append((p, r)) - return uniq - - _FIREWALL_CAPTURE_COMMANDS: Dict[str, Tuple[str, ...]] = { "ipset_save": ("ipset", "save"), "iptables_v4_save": ("iptables-save",), @@ -1539,12 +580,14 @@ def harvest( installed_pkgs = backend.installed_packages() or {} installed_names: Set[str] = set(installed_pkgs.keys()) - persistent_ipset_files = _persistent_firewall_files(_PERSISTENT_IPSET_GLOBS) - persistent_iptables_v4_files = _persistent_firewall_files( - _PERSISTENT_IPTABLES_V4_GLOBS + persistent_ipset_files = system_paths.persistent_firewall_files( + system_paths.persistent_ipset_globs() ) - persistent_iptables_v6_files = _persistent_firewall_files( - _PERSISTENT_IPTABLES_V6_GLOBS + persistent_iptables_v4_files = system_paths.persistent_firewall_files( + system_paths.persistent_iptables_v4_globs() + ) + persistent_iptables_v6_files = system_paths.persistent_firewall_files( + system_paths.persistent_iptables_v6_globs() ) context = HarvestContext( @@ -1644,7 +687,7 @@ def harvest( alias_ranked: Dict[str, tuple[int, str]] = {} def _add_alias(alias: str, role_name: str, *, priority: int) -> None: - key = _safe_name(alias) + key = safe_name(alias) if not key: return cur = alias_ranked.get(key) @@ -1705,12 +748,12 @@ def harvest( if len(svc_roles) > 1: # Direct role-name matches first. for c in [pkg, *uniq]: - rn = _safe_name(c) + rn = safe_name(c) if rn in svc_roles: return (rn, tag) # Next, use the alias map if it points at one of the roles. for c in [pkg, *uniq]: - hit = alias_ranked.get(_safe_name(c)) + hit = alias_ranked.get(safe_name(c)) if hit is not None and hit[1] in svc_roles: return (hit[1], tag) @@ -1721,7 +764,7 @@ def harvest( return (pkg_role, tag) for c in uniq: - key = _safe_name(c) + key = safe_name(c) hit = alias_ranked.get(key) if hit is not None: return (hit[1], tag) @@ -1740,7 +783,7 @@ def harvest( # Capture essential system config/state (even if package-owned). etc_role_seen = seen_by_role.setdefault(etc_role_name, set()) - for path, reason in _iter_system_capture_paths(): + for path, reason in system_paths.iter_system_capture_paths(): if path in already: continue @@ -1754,7 +797,7 @@ def harvest( managed_out, excluded_out = (etc_managed, etc_excluded) role_seen = etc_role_seen - _capture_file( + capture_file( bundle_dir=bundle_dir, role_name=role_for_copy, abs_path=path, @@ -1780,7 +823,7 @@ def harvest( continue if not os.path.isfile(path) or os.path.islink(path): continue - if not _is_confish(path): + if not system_paths.is_confish(path): continue target = _target_role_for_shared_snippet(path) @@ -1793,7 +836,7 @@ def harvest( managed_out, excluded_out = (etc_managed, etc_excluded) role_seen = etc_role_seen - if _capture_file( + if capture_file( bundle_dir=bundle_dir, role_name=role_for_copy, abs_path=path, @@ -1806,12 +849,12 @@ def harvest( seen_global=captured_global, ): scanned += 1 - if scanned >= MAX_FILES_CAP: + if scanned >= system_paths.MAX_FILES_CAP: etc_notes.append( - f"Reached file cap ({MAX_FILES_CAP}) while scanning /etc for unowned files." + f"Reached file cap ({system_paths.MAX_FILES_CAP}) while scanning /etc for unowned files." ) break - if scanned >= MAX_FILES_CAP: + if scanned >= system_paths.MAX_FILES_CAP: break etc_custom_snapshot = EtcCustomSnapshot( @@ -1874,7 +917,7 @@ def harvest( arches = sorted({i.get("arch") for i in installs if i.get("arch")}) vers = sorted({i.get("version") for i in installs if i.get("version")}) version: Optional[str] = vers[0] if len(vers) == 1 else None - section = _package_section_from_installations(installs) + section = package_section_from_installations(installs) observed: List[Dict[str, str]] = [] if pkg in manual_set: diff --git a/enroll/harvest_collectors/cron_logrotate.py b/enroll/harvest_collectors/cron_logrotate.py index c66fd3d..c40c4a1 100644 --- a/enroll/harvest_collectors/cron_logrotate.py +++ b/enroll/harvest_collectors/cron_logrotate.py @@ -4,8 +4,10 @@ import os from dataclasses import dataclass from typing import List, Optional, Set -from .. import harvest as h -from ..harvest import ExcludedFile, ManagedFile, PackageSnapshot +from ..capture import capture_file +from ..harvest_types import ExcludedFile, ManagedFile, PackageSnapshot +from ..package_hints import package_section_from_installations +from ..system_paths import iter_matching_files from .context import HarvestCollector @@ -97,10 +99,10 @@ class CronLogrotateCollector(HarvestCollector): seen: Set[str] = set() for spec in _CRON_CAPTURE_GLOBS: - for path in h._iter_matching_files(spec): + for path in iter_matching_files(spec): if not os.path.isfile(path) or os.path.islink(path): continue - h._capture_file( + capture_file( bundle_dir=self.context.bundle_dir, role_name=self.cron_role_name, abs_path=path, @@ -116,7 +118,7 @@ class CronLogrotateCollector(HarvestCollector): return PackageSnapshot( package=cron_pkg, role_name=self.cron_role_name, - section=h._package_section_from_installations( + section=package_section_from_installations( self.context.installed_pkgs.get(cron_pkg, []) ), managed_files=managed, @@ -131,10 +133,10 @@ class CronLogrotateCollector(HarvestCollector): seen: Set[str] = set() for spec in _LOGROTATE_CAPTURE_GLOBS: - for path in h._iter_matching_files(spec): + for path in iter_matching_files(spec): if not os.path.isfile(path) or os.path.islink(path): continue - h._capture_file( + capture_file( bundle_dir=self.context.bundle_dir, role_name=self.logrotate_role_name, abs_path=path, @@ -150,7 +152,7 @@ class CronLogrotateCollector(HarvestCollector): return PackageSnapshot( package=logrotate_pkg, role_name=self.logrotate_role_name, - section=h._package_section_from_installations( + section=package_section_from_installations( self.context.installed_pkgs.get(logrotate_pkg, []) ), managed_files=managed, diff --git a/enroll/harvest_collectors/package_manager.py b/enroll/harvest_collectors/package_manager.py index 09c270b..0cbeb03 100644 --- a/enroll/harvest_collectors/package_manager.py +++ b/enroll/harvest_collectors/package_manager.py @@ -3,8 +3,14 @@ from __future__ import annotations from dataclasses import dataclass from typing import Dict, List, Set -from .. import harvest as h -from ..harvest import AptConfigSnapshot, DnfConfigSnapshot, ExcludedFile, ManagedFile +from ..capture import capture_file +from ..harvest_types import ( + AptConfigSnapshot, + DnfConfigSnapshot, + ExcludedFile, + ManagedFile, +) +from ..system_paths import iter_apt_capture_paths, iter_dnf_capture_paths from .context import HarvestCollector, HarvestContext @@ -36,8 +42,8 @@ class PackageManagerConfigCollector(HarvestCollector): if self.context.backend.name == "dpkg": apt_role_seen = self.seen_by_role.setdefault(apt_role_name, set()) - for path, reason in h._iter_apt_capture_paths(): - h._capture_file( + for path, reason in iter_apt_capture_paths(): + capture_file( bundle_dir=self.context.bundle_dir, role_name=apt_role_name, abs_path=path, @@ -51,8 +57,8 @@ class PackageManagerConfigCollector(HarvestCollector): ) elif self.context.backend.name == "rpm": dnf_role_seen = self.seen_by_role.setdefault(dnf_role_name, set()) - for path, reason in h._iter_dnf_capture_paths(): - h._capture_file( + for path, reason in iter_dnf_capture_paths(): + capture_file( bundle_dir=self.context.bundle_dir, role_name=dnf_role_name, abs_path=path, diff --git a/enroll/harvest_collectors/paths.py b/enroll/harvest_collectors/paths.py index af9fdbe..f11896a 100644 --- a/enroll/harvest_collectors/paths.py +++ b/enroll/harvest_collectors/paths.py @@ -5,13 +5,15 @@ import os from typing import Dict, List, Optional, Set from .. import harvest as h -from ..harvest import ( +from ..capture import capture_file +from ..harvest_types import ( ExcludedFile, ExtraPathsSnapshot, ManagedDir, ManagedFile, UsrLocalCustomSnapshot, ) +from ..system_paths import MAX_FILES_CAP from ..pathfilter import expand_includes from .context import HarvestCollector, HarvestContext @@ -38,13 +40,13 @@ class UsrLocalCustomCollector(HarvestCollector): self._scan_tree( "/usr/local/etc", require_executable=False, - cap=h.MAX_FILES_CAP, + cap=MAX_FILES_CAP, reason="usr_local_etc_custom", ) self._scan_tree( "/usr/local/bin", require_executable=True, - cap=h.MAX_FILES_CAP, + cap=MAX_FILES_CAP, reason="usr_local_bin_script", ) return UsrLocalCustomSnapshot( @@ -86,7 +88,7 @@ class UsrLocalCustomCollector(HarvestCollector): except ValueError: continue - if h._capture_file( + if capture_file( bundle_dir=self.context.bundle_dir, role_name=self.role_name, abs_path=path, @@ -147,7 +149,7 @@ class ExtraPathsCollector(HarvestCollector): files, inc_notes = expand_includes( self.context.path_filter.iter_include_patterns(), exclude=self.context.path_filter, - max_files=h.MAX_FILES_CAP, + max_files=MAX_FILES_CAP, ) included_files = files self.notes.extend(inc_notes) @@ -156,7 +158,7 @@ class ExtraPathsCollector(HarvestCollector): for path in included_files: if path in self.already_all: continue - if h._capture_file( + if capture_file( bundle_dir=self.context.bundle_dir, role_name=self.role_name, abs_path=path, @@ -198,9 +200,9 @@ class ExtraPathsCollector(HarvestCollector): if not os.path.isdir(root) or os.path.islink(root): return for dirpath, dirnames, _ in os.walk(root, followlinks=False): - if len(self.managed_dirs) >= h.MAX_FILES_CAP: + if len(self.managed_dirs) >= MAX_FILES_CAP: self.notes.append( - f"Reached directory cap ({h.MAX_FILES_CAP}) while scanning {root}." + f"Reached directory cap ({MAX_FILES_CAP}) while scanning {root}." ) return dirpath = os.path.normpath(dirpath) diff --git a/enroll/harvest_collectors/runtime.py b/enroll/harvest_collectors/runtime.py index 2d1eafa..c16f9da 100644 --- a/enroll/harvest_collectors/runtime.py +++ b/enroll/harvest_collectors/runtime.py @@ -5,7 +5,7 @@ from dataclasses import dataclass from typing import List, Optional from .. import harvest as h -from ..harvest import FirewallRuntimeSnapshot, SysctlSnapshot +from ..harvest_types import FirewallRuntimeSnapshot, SysctlSnapshot from .context import HarvestCollector, HarvestContext diff --git a/enroll/harvest_collectors/services.py b/enroll/harvest_collectors/services.py index a962fe3..2b087df 100644 --- a/enroll/harvest_collectors/services.py +++ b/enroll/harvest_collectors/services.py @@ -6,7 +6,23 @@ from dataclasses import dataclass from typing import Dict, List, Optional, Set from .. import harvest as h -from ..harvest import ExcludedFile, ManagedFile, PackageSnapshot, ServiceSnapshot +from ..capture import capture_file, capture_link +from ..harvest_types import ExcludedFile, ManagedFile, PackageSnapshot, ServiceSnapshot +from ..package_hints import ( + SHARED_ETC_TOPDIRS, + add_pkgs_from_etc_topdirs, + hint_names, + maybe_add_specific_paths, + package_section_from_installations, + role_name_from_pkg, + role_name_from_unit, +) +from ..system_paths import ( + MAX_UNOWNED_FILES_PER_ROLE, + is_confish, + scan_unowned_under_roots, + topdirs_for_package, +) from ..systemd import UnitQueryError from .context import HarvestCollector, HarvestContext from .cron_logrotate import CronLogrotateCollector, _is_cron_path, _is_logrotate_path @@ -80,7 +96,7 @@ class ServicePackageCollector(HarvestCollector): enabled_services = [ u for u in enabled_services - if h._role_name_from_unit(u) not in blocked_roles + if role_name_from_unit(u) not in blocked_roles ] enabled_set = set(enabled_services) @@ -106,15 +122,15 @@ class ServicePackageCollector(HarvestCollector): } for unit in sorted(enabled_services, key=service_sort_key): - role = h._role_name_from_unit(unit) + role = role_name_from_unit(unit) parent_unit = parent_unit_for.get(unit) - parent_role = h._role_name_from_unit(parent_unit) if parent_unit else None + parent_role = role_name_from_unit(parent_unit) if parent_unit else None try: ui = h.get_unit_info(unit) except UnitQueryError as e: self.service_role_aliases.setdefault( - role, h._hint_names(unit, set()) | {role} + role, hint_names(unit, set()) | {role} ) self.seen_by_role.setdefault(role, set()) managed = self.managed_by_role.setdefault(role, []) @@ -164,11 +180,11 @@ class ServicePackageCollector(HarvestCollector): elif env_file.startswith("/etc/") and os.path.isfile(env_file): candidates[env_file] = "systemd_envfile" - hints = h._hint_names(unit, pkgs) - h._add_pkgs_from_etc_topdirs(hints, self.context.topdir_to_pkgs, pkgs) + hints = hint_names(unit, pkgs) + add_pkgs_from_etc_topdirs(hints, self.context.topdir_to_pkgs, pkgs) self.service_role_aliases[role] = set(hints) | set(pkgs) | {role} - for sp in h._maybe_add_specific_paths(hints, backend): + for sp in maybe_add_specific_paths(hints, backend): if not os.path.exists(sp): continue if sp in self.context.etc_owner_map: @@ -193,26 +209,26 @@ class ServicePackageCollector(HarvestCollector): confish_roots: List[str] = [] for hint in hints: roots_for_hint = [f"/etc/{hint}", f"/etc/{hint}.d"] - if hint in h.SHARED_ETC_TOPDIRS: + if hint in SHARED_ETC_TOPDIRS: confish_roots.extend(roots_for_hint) else: any_roots.extend(roots_for_hint) found: List[str] = [] found.extend( - h._scan_unowned_under_roots( + scan_unowned_under_roots( any_roots, self.context.owned_etc, - limit=h.MAX_UNOWNED_FILES_PER_ROLE, + limit=MAX_UNOWNED_FILES_PER_ROLE, confish_only=False, ) ) - if len(found) < h.MAX_UNOWNED_FILES_PER_ROLE: + if len(found) < MAX_UNOWNED_FILES_PER_ROLE: found.extend( - h._scan_unowned_under_roots( + scan_unowned_under_roots( confish_roots, self.context.owned_etc, - limit=h.MAX_UNOWNED_FILES_PER_ROLE - len(found), + limit=MAX_UNOWNED_FILES_PER_ROLE - len(found), confish_only=True, ) ) @@ -236,7 +252,7 @@ class ServicePackageCollector(HarvestCollector): dest_managed = self.managed_by_role.setdefault(dest_role, []) dest_excluded = self.excluded_by_role.setdefault(dest_role, []) dest_seen = self.seen_by_role.setdefault(dest_role, set()) - h._capture_file( + capture_file( bundle_dir=self.context.bundle_dir, role_name=dest_role, abs_path=path, @@ -305,7 +321,7 @@ class ServicePackageCollector(HarvestCollector): if snap is not None: role_seen = self.seen_by_role.setdefault(snap.role_name, set()) for path in timer_paths: - h._capture_file( + capture_file( bundle_dir=self.context.bundle_dir, role_name=snap.role_name, abs_path=path, @@ -374,7 +390,7 @@ class ServicePackageCollector(HarvestCollector): manual_pkgs_skipped.append(pkg) continue - role = h._role_name_from_pkg(pkg) + role = role_name_from_pkg(pkg) notes: List[str] = [] excluded: List[ExcludedFile] = [] managed: List[ManagedFile] = [] @@ -395,19 +411,19 @@ class ServicePackageCollector(HarvestCollector): continue candidates.setdefault(path, reason) - topdirs = h._topdirs_for_package(pkg, self.context.pkg_to_etc_paths) + topdirs = topdirs_for_package(pkg, self.context.pkg_to_etc_paths) roots: List[str] = [] for topdir in sorted(topdirs): - if topdir in h.SHARED_ETC_TOPDIRS: + if topdir in SHARED_ETC_TOPDIRS: continue if backend.is_pkg_config_path( f"/etc/{topdir}/" ) or backend.is_pkg_config_path(f"/etc/{topdir}"): continue roots.extend([f"/etc/{topdir}", f"/etc/{topdir}.d"]) - roots.extend(h._maybe_add_specific_paths(set(topdirs), backend)) + roots.extend(maybe_add_specific_paths(set(topdirs), backend)) - for pth in h._scan_unowned_under_roots( + for pth in scan_unowned_under_roots( [r for r in roots if os.path.isdir(r)], self.context.owned_etc, confish_only=False, @@ -416,12 +432,12 @@ class ServicePackageCollector(HarvestCollector): for root in roots: if os.path.isfile(root) and not os.path.islink(root): - if root not in self.context.owned_etc and h._is_confish(root): + if root not in self.context.owned_etc and is_confish(root): candidates.setdefault(root, "custom_specific_path") role_seen = self.seen_by_role.setdefault(role, set()) for path, reason in sorted(candidates.items()): - h._capture_file( + capture_file( bundle_dir=self.context.bundle_dir, role_name=role, abs_path=path, @@ -445,7 +461,7 @@ class ServicePackageCollector(HarvestCollector): PackageSnapshot( package=pkg, role_name=role, - section=h._package_section_from_installations( + section=package_section_from_installations( self.context.installed_pkgs.get(pkg, []) ), managed_files=managed, @@ -490,7 +506,7 @@ class ServicePackageCollector(HarvestCollector): for pth in sorted(glob.glob(os.path.join(directory, "*"))): if not os.path.islink(pth): continue - h._capture_link( + capture_link( role_name=role_name, abs_path=pth, reason="enabled_symlink", diff --git a/enroll/harvest_collectors/users.py b/enroll/harvest_collectors/users.py index 7640d1f..d1e86fe 100644 --- a/enroll/harvest_collectors/users.py +++ b/enroll/harvest_collectors/users.py @@ -4,7 +4,8 @@ from dataclasses import asdict, dataclass from typing import Any, Dict, List, Set from .. import harvest as h -from ..harvest import ( +from ..capture import capture_file, capture_user_shell_dotfiles +from ..harvest_types import ( ExcludedFile, FlatpakSnapshot, ManagedFile, @@ -104,7 +105,7 @@ class UsersCollector(HarvestCollector): if ssh_file.endswith("/authorized_keys") else "ssh_public_key" ) - h._capture_file( + capture_file( bundle_dir=self.context.bundle_dir, role_name=users_role_name, abs_path=ssh_file, @@ -121,7 +122,7 @@ class UsersCollector(HarvestCollector): # often contain exported tokens or aliases/functions with embedded secrets. home = (user.home or "").rstrip("/") if home and home.startswith("/"): - h._capture_user_shell_dotfiles( + capture_user_shell_dotfiles( bundle_dir=self.context.bundle_dir, role_name=users_role_name, home=home, diff --git a/enroll/harvest_types.py b/enroll/harvest_types.py new file mode 100644 index 0000000..ba5bf77 --- /dev/null +++ b/enroll/harvest_types.py @@ -0,0 +1,165 @@ +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any, Dict, List, Optional + + +@dataclass +class ManagedFile: + path: str + src_rel: str + owner: str + group: str + mode: str + reason: str + + +@dataclass +class ManagedLink: + """A symlink we want to materialise on the target host. + + For configuration enablement patterns (e.g. sites-enabled), the symlink is + meaningful state even when the link target is captured elsewhere. + """ + + path: str + target: str + reason: str + + +@dataclass +class ManagedDir: + path: str + owner: str + group: str + mode: str + reason: str + + +@dataclass +class ExcludedFile: + path: str + reason: str + + +@dataclass +class ServiceSnapshot: + unit: str + role_name: str + packages: List[str] + active_state: Optional[str] + sub_state: Optional[str] + unit_file_state: Optional[str] + condition_result: Optional[str] + managed_dirs: List[ManagedDir] = field(default_factory=list) + managed_files: List[ManagedFile] = field(default_factory=list) + managed_links: List[ManagedLink] = field(default_factory=list) + excluded: List[ExcludedFile] = field(default_factory=list) + notes: List[str] = field(default_factory=list) + + +@dataclass +class PackageSnapshot: + package: str + role_name: str + section: Optional[str] = None + managed_dirs: List[ManagedDir] = field(default_factory=list) + managed_files: List[ManagedFile] = field(default_factory=list) + managed_links: List[ManagedLink] = field(default_factory=list) + excluded: List[ExcludedFile] = field(default_factory=list) + notes: List[str] = field(default_factory=list) + has_config: bool = True # False if package has no config/systemd/cron files + + +@dataclass +class UsersSnapshot: + role_name: str + users: List[dict] + managed_dirs: List[ManagedDir] = field(default_factory=list) + managed_files: List[ManagedFile] = field(default_factory=list) + excluded: List[ExcludedFile] = field(default_factory=list) + notes: List[str] = field(default_factory=list) + user_flatpaks: Dict[str, List[Dict[str, Any]]] = field(default_factory=dict) + user_flatpak_remotes: List[Dict[str, Any]] = field(default_factory=list) + + +@dataclass +class FlatpakSnapshot: + role_name: str + system_flatpaks: List[Dict[str, Any]] = field(default_factory=list) + remotes: List[Dict[str, Any]] = field(default_factory=list) + notes: List[str] = field(default_factory=list) + + +@dataclass +class SnapSnapshot: + role_name: str + system_snaps: List[Dict[str, Any]] = field(default_factory=list) + notes: List[str] = field(default_factory=list) + + +@dataclass +class AptConfigSnapshot: + role_name: str + managed_dirs: List[ManagedDir] = field(default_factory=list) + managed_files: List[ManagedFile] = field(default_factory=list) + excluded: List[ExcludedFile] = field(default_factory=list) + notes: List[str] = field(default_factory=list) + + +@dataclass +class DnfConfigSnapshot: + role_name: str + managed_dirs: List[ManagedDir] = field(default_factory=list) + managed_files: List[ManagedFile] = field(default_factory=list) + excluded: List[ExcludedFile] = field(default_factory=list) + notes: List[str] = field(default_factory=list) + + +@dataclass +class EtcCustomSnapshot: + role_name: str + managed_dirs: List[ManagedDir] = field(default_factory=list) + managed_files: List[ManagedFile] = field(default_factory=list) + excluded: List[ExcludedFile] = field(default_factory=list) + notes: List[str] = field(default_factory=list) + + +@dataclass +class UsrLocalCustomSnapshot: + role_name: str + managed_dirs: List[ManagedDir] = field(default_factory=list) + managed_files: List[ManagedFile] = field(default_factory=list) + excluded: List[ExcludedFile] = field(default_factory=list) + notes: List[str] = field(default_factory=list) + + +@dataclass +class ExtraPathsSnapshot: + role_name: str + include_patterns: List[str] = field(default_factory=list) + exclude_patterns: List[str] = field(default_factory=list) + managed_dirs: List[ManagedDir] = field(default_factory=list) + managed_files: List[ManagedFile] = field(default_factory=list) + managed_links: List[ManagedLink] = field(default_factory=list) + excluded: List[ExcludedFile] = field(default_factory=list) + notes: List[str] = field(default_factory=list) + + +@dataclass +class FirewallRuntimeSnapshot: + role_name: str + packages: List[str] = field(default_factory=list) + ipset_save: Optional[str] = None + ipset_sets: List[str] = field(default_factory=list) + iptables_v4_save: Optional[str] = None + iptables_v6_save: Optional[str] = None + notes: List[str] = field(default_factory=list) + + +@dataclass +class SysctlSnapshot: + role_name: str + managed_files: List[ManagedFile] = field(default_factory=list) + parameters: Dict[str, str] = field(default_factory=dict) + notes: List[str] = field(default_factory=list) diff --git a/enroll/package_hints.py b/enroll/package_hints.py new file mode 100644 index 0000000..b710ed2 --- /dev/null +++ b/enroll/package_hints.py @@ -0,0 +1,126 @@ +from __future__ import annotations + +import re +from typing import Dict, List, Optional, Set + +from .role_names import avoid_reserved_role_name + + +# Directories that are shared across many packages. Never attribute all unowned +# files in these trees to one single package. +SHARED_ETC_TOPDIRS = { + "apparmor.d", + "apt", + "cron.d", + "cron.daily", + "cron.weekly", + "cron.monthly", + "cron.hourly", + "default", + "init.d", + "logrotate.d", + "modprobe.d", + "network", + "pam.d", + "ssh", + "ssl", + "sudoers.d", + "sysctl.d", + "systemd", + # RPM-family shared trees + "dnf", + "yum", + "yum.repos.d", + "sysconfig", + "pki", + "firewalld", +} + + +def safe_name(s: str) -> str: + out: List[str] = [] + for ch in s: + out.append(ch if ch.isalnum() or ch in ("_", "-") else "_") + return "".join(out).replace("-", "_") + + +def role_id(raw: str) -> str: + # normalise separators first + s = re.sub(r"[^A-Za-z0-9]+", "_", raw) + # split CamelCase -> snake_case + s = re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", s) + s = s.lower() + s = re.sub(r"_+", "_", s).strip("_") + if not re.match(r"^[a-z_]", s): + s = "r_" + s + return s + + +def role_name_from_unit(unit: str) -> str: + base = role_id(unit.removesuffix(".service")) + return avoid_reserved_role_name(safe_name(base), prefix="service") + + +def role_name_from_pkg(pkg: str) -> str: + return avoid_reserved_role_name(safe_name(pkg), prefix="package") + + +def package_section_from_installations( + installs: List[Dict[str, str]], +) -> Optional[str]: + """Return a stable package grouping label from installed package metadata.""" + + values: Set[str] = set() + for inst in installs or []: + value = (inst.get("section") or inst.get("group") or "").strip() + if not value: + continue + if value.lower() in {"(none)", "none", "unspecified"}: + continue + values.add(value) + + if not values: + return None + return sorted(values)[0] + + +def hint_names(unit: str, pkgs: Set[str]) -> Set[str]: + base = unit.removesuffix(".service") + hints = {base} + if "@" in base: + hints.add(base.split("@", 1)[0]) + hints |= set(pkgs) + hints |= {h.split(".", 1)[0] for h in list(hints) if "." in h} + return {h for h in hints if h} + + +def add_pkgs_from_etc_topdirs( + hints: Set[str], topdir_to_pkgs: Dict[str, Set[str]], pkgs: Set[str] +) -> None: + """Expand a service's package set using package-owned /etc top-level dirs.""" + + for h in hints: + for top in (h, f"{h}.d"): + if top in SHARED_ETC_TOPDIRS: + continue + for p in topdir_to_pkgs.get(top, set()): + pkgs.add(p) + + +def maybe_add_specific_paths(hints: Set[str], backend) -> List[str]: + # Delegate to backend-specific conventions (e.g. /etc/default on Debian, + # /etc/sysconfig on Fedora/RHEL). Always include sysctl.d. + try: + return backend.specific_paths_for_hints(hints) + except Exception: + # Best-effort fallback (Debian-ish). + paths: List[str] = [] + for h in hints: + paths.extend( + [ + f"/etc/default/{h}", + f"/etc/init.d/{h}", + f"/etc/sysctl.d/{h}.conf", + ] + ) + return paths diff --git a/enroll/puppet.py b/enroll/puppet.py index 2993e02..4fa059a 100644 --- a/enroll/puppet.py +++ b/enroll/puppet.py @@ -4,7 +4,9 @@ import json import re import shutil from pathlib import Path -from typing import Any, Dict, Iterable, List, Optional, Tuple +from typing import Any, Dict, Iterable, List, Optional, Set, Tuple + +import yaml from .cm import ( CMModule, @@ -87,6 +89,7 @@ class PuppetRole(CMModule): bundle_dir: str, artifact_role: str, module_files_dir: Path, + file_prefix: Optional[str] = None, ) -> None: for d in self.managed_dirs_from_snapshot(snap): path = str(d.get("path") or "").strip() @@ -104,7 +107,11 @@ class PuppetRole(CMModule): if not path or not src_rel: continue module_rel = _copy_artifact( - bundle_dir, artifact_role, src_rel, module_files_dir + bundle_dir, + artifact_role, + src_rel, + module_files_dir, + dst_prefix=file_prefix, ) if not module_rel: self.notes.append( @@ -203,17 +210,23 @@ def _resource( def _copy_artifact( - bundle_dir: str, role: str, src_rel: str, dst_files_dir: Path + bundle_dir: str, + role: str, + src_rel: str, + dst_files_dir: Path, + *, + dst_prefix: Optional[str] = None, ) -> Optional[str]: if not role or not src_rel: return None src = Path(bundle_dir) / "artifacts" / role / src_rel if not src.is_file(): return None - dst = dst_files_dir / src_rel + module_rel = Path(dst_prefix or "") / src_rel + dst = dst_files_dir / module_rel dst.parent.mkdir(parents=True, exist_ok=True) shutil.copy2(src, dst) - return Path(src_rel).as_posix() + return module_rel.as_posix() def _source_uri(module_name: str, module_rel: str) -> str: @@ -237,6 +250,21 @@ def _add_flatpak_snap_notes(roles: Dict[str, Any], out: Dict[str, PuppetRole]) - ) +def _node_data_filename(fqdn: str) -> str: + """Return a safe Hiera node-data filename for an FQDN/certname.""" + + name = str(fqdn or "").strip().replace("/", "_").replace("\\", "_") + return f"{name or 'node'}.yaml" + + +def _node_file_prefix(fqdn: str) -> str: + """Return a safe module-files prefix for node-specific artifacts.""" + + name = re.sub(r"[^A-Za-z0-9_.-]+", "_", str(fqdn or "").strip()) + name = name.strip("._-") or "node" + return f"nodes/{name}" + + def _collect_puppet_roles( state: Dict[str, Any], bundle_dir: str, @@ -248,6 +276,7 @@ def _collect_puppet_roles( roles = roles_from_state(state) inventory_packages = inventory_packages_from_state(state) use_common_modules = not fqdn and not no_common_roles + node_file_prefix = _node_file_prefix(fqdn) if fqdn else None out: Dict[str, PuppetRole] = {} def ensure_role(role_name: str) -> PuppetRole: @@ -275,6 +304,7 @@ def _collect_puppet_roles( bundle_dir=bundle_dir, artifact_role=str(snap.get("role_name") or key), module_files_dir=module_files_dir, + file_prefix=node_file_prefix, ) users_snap = roles.get("users") or {} @@ -289,6 +319,7 @@ def _collect_puppet_roles( bundle_dir=bundle_dir, artifact_role=str(users_snap.get("role_name") or "users"), module_files_dir=modules_dir / prole.module_name / "files", + file_prefix=node_file_prefix, ) for svc in roles.get("services", []) or []: @@ -319,6 +350,7 @@ def _collect_puppet_roles( bundle_dir=bundle_dir, artifact_role=str(svc.get("role_name") or original_role_name), module_files_dir=modules_dir / prole.module_name / "files", + file_prefix=node_file_prefix, ) for pkg in roles.get("packages", []) or []: @@ -342,6 +374,7 @@ def _collect_puppet_roles( bundle_dir=bundle_dir, artifact_role=str(pkg.get("role_name") or original_role_name), module_files_dir=modules_dir / prole.module_name / "files", + file_prefix=node_file_prefix, ) fw = roles.get("firewall_runtime") or {} @@ -489,6 +522,164 @@ def _render_role_class(prole: PuppetRole) -> str: return "\n".join(lines) +def _attrs_with_ensure(attrs: Dict[str, Any], ensure: str) -> Dict[str, Any]: + out = {"ensure": ensure} + out.update(attrs) + return out + + +def _role_hiera_values(prole: PuppetRole) -> Dict[str, Any]: + """Return Automatic Parameter Lookup data for one generated module.""" + + data: Dict[str, Any] = {} + prefix = f"{prole.module_name}::" + + if prole.packages: + data[f"{prefix}packages"] = sorted(prole.packages) + + if prole.groups: + data[f"{prefix}groups"] = { + group: {"ensure": "present"} for group in sorted(prole.groups) + } + + if prole.users: + users: Dict[str, Dict[str, Any]] = {} + for name in sorted(prole.users): + user = prole.users[name] + attrs: Dict[str, Any] = {"ensure": "present", "managehome": True} + if user.get("uid") is not None: + attrs["uid"] = user["uid"] + if user.get("primary_group"): + attrs["gid"] = user["primary_group"] + if user.get("home"): + attrs["home"] = user["home"] + if user.get("shell"): + attrs["shell"] = user["shell"] + if user.get("gecos"): + attrs["comment"] = user["gecos"] + if user.get("supplementary_groups"): + attrs["groups"] = list(user["supplementary_groups"]) + attrs["membership"] = "minimum" + users[name] = attrs + data[f"{prefix}users"] = users + + if prole.dirs: + data[f"{prefix}dirs"] = { + path: _attrs_with_ensure(prole.dirs[path], "directory") + for path in sorted(prole.dirs) + } + + if prole.files: + data[f"{prefix}files"] = { + path: _attrs_with_ensure(prole.files[path], "file") + for path in sorted(prole.files) + } + + if prole.links: + data[f"{prefix}links"] = { + path: _attrs_with_ensure(prole.links[path], "link") + for path in sorted(prole.links) + } + + if prole.services: + data[f"{prefix}services"] = { + name: { + "ensure": prole.services[name].get("ensure") or "stopped", + "enable": bool(prole.services[name].get("enable")), + } + for name in sorted(prole.services) + } + + if prole.notes: + data[f"{prefix}notes"] = list(prole.notes) + + if "/etc/sysctl.d/99-enroll.conf" in prole.files: + data[f"{prefix}sysctl_apply"] = True + data[f"{prefix}sysctl_ignore_apply_errors"] = True + + return data + + +def _render_hiera_role_class(prole: PuppetRole) -> str: + """Render a reusable, data-driven Puppet class for --fqdn/Hiera mode.""" + + lines: List[str] = [ + "# Generated by Enroll from harvest state.", + "# Resource data is supplied by Hiera Automatic Parameter Lookup.", + f"class {prole.module_name} (", + " Array[String] $packages = [],", + " Hash[String, Hash] $groups = {},", + " Hash[String, Hash] $users = {},", + " Hash[String, Hash] $dirs = {},", + " Hash[String, Hash] $files = {},", + " Hash[String, Hash] $links = {},", + " Hash[String, Hash] $services = {},", + " Array[String] $notes = [],", + " Boolean $sysctl_apply = true,", + " Boolean $sysctl_ignore_apply_errors = true,", + ") {", + "", + " $packages.each |String $package_name| {", + " package { $package_name:", + " ensure => 'installed',", + " }", + " }", + "", + " $groups.each |String $resource_title, Hash $attrs| {", + " group { $resource_title:", + " * => $attrs,", + " }", + " }", + "", + " $users.each |String $resource_title, Hash $attrs| {", + " user { $resource_title:", + " * => $attrs,", + " }", + " }", + "", + " $dirs.each |String $resource_title, Hash $attrs| {", + " file { $resource_title:", + " * => $attrs,", + " }", + " }", + "", + " $files.each |String $resource_title, Hash $attrs| {", + " file { $resource_title:", + " * => $attrs,", + " }", + " }", + "", + " $links.each |String $resource_title, Hash $attrs| {", + " file { $resource_title:", + " * => $attrs,", + " }", + " }", + "", + " $services.each |String $resource_title, Hash $attrs| {", + " service { $resource_title:", + " * => $attrs,", + " }", + " }", + "", + " if $sysctl_apply and $files.has_key('/etc/sysctl.d/99-enroll.conf') {", + " exec { 'enroll-apply-sysctl':", + " command => $sysctl_ignore_apply_errors ? {", + " true => \"/bin/sh -c 'sysctl -e -p /etc/sysctl.d/99-enroll.conf || true'\",", + " default => 'sysctl -e -p /etc/sysctl.d/99-enroll.conf',", + " },", + " path => ['/sbin', '/usr/sbin', '/bin', '/usr/bin'],", + " refreshonly => true,", + " subscribe => File['/etc/sysctl.d/99-enroll.conf'],", + " }", + " }", + "", + " # Generated notes are supplied through the $notes parameter for review.", + "}", + "", + ] + return "\n".join(lines) + + def _render_site_pp(puppet_roles: List[PuppetRole], fqdn: Optional[str]) -> str: node_name = _pp_quote(fqdn) if fqdn else "default" if not puppet_roles: @@ -497,6 +688,91 @@ def _render_site_pp(puppet_roles: List[PuppetRole], fqdn: Optional[str]) -> str: return f"node {node_name} {{\n{includes}\n}}\n" +def _render_hiera_site_pp(node_names: List[str]) -> str: + lines: List[str] = [ + "# Generated by Enroll from harvest state.", + "# Per-node class lists and resources are read from Hiera data.", + "", + ] + for node_name in node_names: + lines.extend( + [ + f"node {_pp_quote(node_name)} {{", + " $enroll_classes = lookup('enroll::classes', Array[String], 'unique', [])", + " $enroll_classes.each |String $enroll_class| {", + " include $enroll_class", + " }", + "}", + "", + ] + ) + lines.extend( + [ + "node default {", + " $enroll_classes = lookup('enroll::classes', Array[String], 'unique', [])", + " $enroll_classes.each |String $enroll_class| {", + " include $enroll_class", + " }", + "}", + "", + ] + ) + return "\n".join(lines) + + +def _render_hiera_yaml() -> str: + data = { + "version": 5, + "defaults": {"datadir": "data", "data_hash": "yaml_data"}, + "hierarchy": [ + { + "name": "Enroll trusted certname node data", + "path": "nodes/%{trusted.certname}.yaml", + }, + { + "name": "Enroll networking FQDN node data", + "path": "nodes/%{facts.networking.fqdn}.yaml", + }, + {"name": "Enroll common data", "path": "common.yaml"}, + ], + } + return yaml.safe_dump(data, sort_keys=False, explicit_start=True) + + +def _write_yaml(path: Path, data: Dict[str, Any]) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text( + yaml.safe_dump(data, sort_keys=True, explicit_start=True), + encoding="utf-8", + ) + + +def _write_hiera_node_data( + out: Path, fqdn: str, puppet_roles: List[PuppetRole] +) -> Path: + node_data: Dict[str, Any] = { + "enroll::classes": [r.module_name for r in puppet_roles] + } + for prole in puppet_roles: + node_data.update(_role_hiera_values(prole)) + node_path = out / "data" / "nodes" / _node_data_filename(fqdn) + _write_yaml(node_path, node_data) + common_path = out / "data" / "common.yaml" + if not common_path.exists(): + _write_yaml(common_path, {"enroll::classes": []}) + return node_path + + +def _hiera_node_names(out: Path) -> List[str]: + nodes_dir = out / "data" / "nodes" + if not nodes_dir.is_dir(): + return [] + out_names: Set[str] = set() + for path in nodes_dir.glob("*.yaml"): + out_names.add(path.name[: -len(".yaml")]) + return sorted(out_names) + + def _write_metadata(module_dir: Path, module_name: str) -> None: (module_dir / "metadata.json").write_text( json.dumps( @@ -517,9 +793,16 @@ def _write_metadata(module_dir: Path, module_name: str) -> None: ) -def _render_readme(state: Dict[str, Any], puppet_roles: List[PuppetRole]) -> str: +def _render_readme( + state: Dict[str, Any], + puppet_roles: List[PuppetRole], + *, + fqdn: Optional[str] = None, + node_names: Optional[List[str]] = None, +) -> str: host = state.get("host", {}) if isinstance(state.get("host"), dict) else {} hostname = host.get("hostname") or "unknown" + hiera_mode = bool(fqdn) role_lines = ( "\n".join( f"- `{r.module_name}` from Enroll role `{r.role_name}`" @@ -527,11 +810,39 @@ def _render_readme(state: Dict[str, Any], puppet_roles: List[PuppetRole]) -> str ) or "- None." ) + node_lines = "\n".join(f"- `{n}`" for n in (node_names or [])) or "- None." notes: List[str] = [] for r in puppet_roles: for note in r.notes: notes.append(f"`{r.module_name}`: {note}") notes_text = "\n".join(f"- {n}" for n in notes) or "- None." + if hiera_mode: + layout = f"""- `manifests/site.pp` declares node blocks and includes classes listed in Hiera key `enroll::classes`. +- `hiera.yaml` configures per-node lookup from `data/nodes/%{{trusted.certname}}.yaml` with a fallback to `data/common.yaml`. +- `data/nodes/{_node_data_filename(fqdn or '')}` contains this node's class list and class parameter data. +- `modules//manifests/init.pp` contains reusable, data-driven classes. +- `modules//files/nodes//...` contains node-specific harvested file artifacts, avoiding clashes between hosts.""" + apply = f"""Run from this generated output directory, passing the node certname so Hiera selects the right node data: + +```bash +sudo puppet apply --modulepath ./modules --hiera_config ./hiera.yaml --certname {fqdn} manifests/site.pp --noop +``` + +For Puppet agent/control-repo use, place this output where `hiera.yaml`, `data/`, `manifests/`, and `modules/` form the environment root. Re-running Enroll with another `--fqdn` into the same output directory adds or replaces that node's YAML without deleting existing node data.""" + else: + layout = """- `manifests/site.pp` declares a `node` block and includes the generated classes in manifest order. +- `modules//manifests/init.pp` contains resources for each generated Enroll role/snapshot or common package group. +- `modules//files/` contains harvested file artifacts for that role or group. +- Generated module names avoid Puppet reserved words such as `default`.""" + apply = """Run from this generated output directory so Puppet can find `./modules`, or pass an absolute module path: + +```bash +sudo puppet apply --modulepath ./modules manifests/site.pp --noop +``` + +```bash +sudo puppet apply --modulepath /path/to/generated/modules /path/to/generated/manifests/site.pp --noop +```""" return f"""# Enroll Puppet manifest Generated by Enroll from harvest data for `{hostname}`. @@ -540,10 +851,11 @@ This Puppet target reuses the existing harvest state without changing harvesting ## Layout -- `manifests/site.pp` declares a `node` block and includes the generated classes in manifest order. -- `modules//manifests/init.pp` contains resources for each generated Enroll role/snapshot or common package group. -- `modules//files/` contains harvested file artifacts for that role or group. -- Generated module names avoid Puppet reserved words such as `default`. +{layout} + +## Known nodes + +{node_lines if hiera_mode else '- Non-Hiera single-node output.'} ## Generated modules @@ -551,15 +863,7 @@ This Puppet target reuses the existing harvest state without changing harvesting ## Apply / check -Run from this generated output directory so Puppet can find `./modules`, or pass an absolute module path: - -```bash -sudo puppet apply --modulepath ./modules manifests/site.pp --noop -``` - -```bash -sudo puppet apply --modulepath /path/to/generated/modules /path/to/generated/manifests/site.pp --noop -``` +{apply} ## Generated resources @@ -607,7 +911,8 @@ class PuppetManifestRenderer: state = PuppetRole.load_state(bundle_dir) out = Path(out_dir) - if out.exists(): + hiera_mode = bool(fqdn) + if out.exists() and not hiera_mode: shutil.rmtree(out) manifests_dir = out / "manifests" modules_dir = out / "modules" @@ -628,15 +933,35 @@ class PuppetManifestRenderer: module_manifests.mkdir(parents=True, exist_ok=True) module_files.mkdir(parents=True, exist_ok=True) (module_manifests / "init.pp").write_text( - _render_role_class(prole), encoding="utf-8" + ( + _render_hiera_role_class(prole) + if hiera_mode + else _render_role_class(prole) + ), + encoding="utf-8", ) _write_metadata(module_dir, prole.module_name) - (manifests_dir / "site.pp").write_text( - _render_site_pp(puppet_roles, fqdn), encoding="utf-8" - ) + node_names: List[str] = [] + if hiera_mode and fqdn: + (out / "hiera.yaml").write_text(_render_hiera_yaml(), encoding="utf-8") + _write_hiera_node_data(out, fqdn, puppet_roles) + node_names = _hiera_node_names(out) + (manifests_dir / "site.pp").write_text( + _render_hiera_site_pp(node_names), encoding="utf-8" + ) + else: + (manifests_dir / "site.pp").write_text( + _render_site_pp(puppet_roles, fqdn), encoding="utf-8" + ) (out / "README.md").write_text( - _render_readme(state, puppet_roles), encoding="utf-8" + _render_readme( + state, + puppet_roles, + fqdn=fqdn, + node_names=node_names, + ), + encoding="utf-8", ) diff --git a/enroll/system_paths.py b/enroll/system_paths.py new file mode 100644 index 0000000..759d7b5 --- /dev/null +++ b/enroll/system_paths.py @@ -0,0 +1,313 @@ +from __future__ import annotations + +import glob +import os +import re +from typing import Dict, List, Set, Tuple + + +ALLOWED_UNOWNED_EXTS = { + ".cfg", + ".cnf", + ".conf", + ".ini", + ".json", + ".link", + ".mount", + ".netdev", + ".network", + ".path", + ".rules", + ".service", + ".socket", + ".target", + ".timer", + ".toml", + ".yaml", + ".yml", + "", # allow extensionless (common in /etc/default and /etc/init.d) +} + +MAX_FILES_CAP = 4000 +MAX_UNOWNED_FILES_PER_ROLE = 500 + + +def is_confish(path: str) -> bool: + base = os.path.basename(path) + _, ext = os.path.splitext(base) + return ext in ALLOWED_UNOWNED_EXTS + + +def scan_unowned_under_roots( + roots: List[str], + owned_etc: Set[str], + limit: int = MAX_UNOWNED_FILES_PER_ROLE, + *, + confish_only: bool = True, +) -> List[str]: + found: List[str] = [] + for root in roots: + if not os.path.isdir(root): + continue + for dirpath, _, filenames in os.walk(root): + if len(found) >= limit: + return found + for fn in filenames: + if len(found) >= limit: + return found + p = os.path.join(dirpath, fn) + if not p.startswith("/etc/"): + continue + if p in owned_etc: + continue + if not os.path.isfile(p) or os.path.islink(p): + continue + if confish_only and not is_confish(p): + continue + found.append(p) + return found + + +def topdirs_for_package(pkg: str, pkg_to_etc_paths: Dict[str, List[str]]) -> Set[str]: + topdirs: Set[str] = set() + for path in pkg_to_etc_paths.get(pkg, []): + parts = path.split("/", 3) + if len(parts) >= 3 and parts[1] == "etc" and parts[2]: + topdirs.add(parts[2]) + return topdirs + + +_APT_SOURCE_GLOBS = [ + "/etc/apt/sources.list", + "/etc/apt/sources.list.d/*.list", + "/etc/apt/sources.list.d/*.sources", +] + +_SYSTEM_CAPTURE_GLOBS: List[Tuple[str, str]] = [ + ("/etc/fstab", "system_mounts"), + ("/etc/crypttab", "system_mounts"), + ("/etc/sysctl.conf", "system_sysctl"), + ("/etc/sysctl.d/*", "system_sysctl"), + ("/etc/modprobe.d/*", "system_modprobe"), + ("/etc/modules", "system_modprobe"), + ("/etc/modules-load.d/*", "system_modprobe"), + ("/etc/netplan/*", "system_network"), + ("/etc/systemd/network/*", "system_network"), + ("/etc/network/interfaces", "system_network"), + ("/etc/network/interfaces.d/*", "system_network"), + ("/etc/resolvconf.conf", "system_network"), + ("/etc/resolvconf/resolv.conf.d/*", "system_network"), + ("/etc/NetworkManager/system-connections/*", "system_network"), + ("/etc/sysconfig/network*", "system_network"), + ("/etc/sysconfig/network-scripts/*", "system_network"), + ("/etc/nftables.conf", "system_firewall"), + ("/etc/nftables.d/*", "system_firewall"), + ("/etc/iptables/rules.v4", "system_firewall"), + ("/etc/iptables/rules.v6", "system_firewall"), + ("/etc/sysconfig/iptables", "system_firewall"), + ("/etc/sysconfig/ip6tables", "system_firewall"), + ("/etc/ipset.conf", "system_firewall"), + ("/etc/ipset/*", "system_firewall"), + ("/etc/ipset.d/*", "system_firewall"), + ("/etc/sysconfig/ipset", "system_firewall"), + ("/etc/default/ipset", "system_firewall"), + ("/etc/ufw/*", "system_firewall"), + ("/etc/default/ufw", "system_firewall"), + ("/etc/firewalld/*", "system_firewall"), + ("/etc/firewalld/zones/*", "system_firewall"), + ("/etc/selinux/config", "system_security"), + ("/etc/rc.local", "system_rc"), +] + +_PERSISTENT_IPTABLES_V4_GLOBS = [ + "/etc/iptables/rules.v4", + "/etc/sysconfig/iptables", +] + +_PERSISTENT_IPTABLES_V6_GLOBS = [ + "/etc/iptables/rules.v6", + "/etc/sysconfig/ip6tables", +] + +_PERSISTENT_IPSET_GLOBS = [ + "/etc/ipset.conf", + "/etc/ipset/*", + "/etc/ipset.d/*", + "/etc/sysconfig/ipset", +] + + +def persistent_ipset_globs() -> List[str]: + return list(_PERSISTENT_IPSET_GLOBS) + + +def persistent_iptables_v4_globs() -> List[str]: + return list(_PERSISTENT_IPTABLES_V4_GLOBS) + + +def persistent_iptables_v6_globs() -> List[str]: + return list(_PERSISTENT_IPTABLES_V6_GLOBS) + + +def persistent_firewall_files(globs: List[str]) -> List[str]: + """Return persistent firewall files matching ``globs``.""" + + seen: Set[str] = set() + out: List[str] = [] + for spec in globs: + for path in iter_matching_files(spec): + if path in seen: + continue + seen.add(path) + out.append(path) + return sorted(out) + + +def iter_matching_files(spec: str, *, cap: int = MAX_FILES_CAP) -> List[str]: + """Expand a glob spec and also walk directories to collect files.""" + + out: List[str] = [] + for p in glob.glob(spec): + if len(out) >= cap: + break + if os.path.islink(p): + continue + if os.path.isfile(p): + out.append(p) + continue + if os.path.isdir(p): + for dirpath, _, filenames in os.walk(p): + for fn in filenames: + if len(out) >= cap: + break + fp = os.path.join(dirpath, fn) + if os.path.islink(fp) or not os.path.isfile(fp): + continue + out.append(fp) + if len(out) >= cap: + break + return out + + +def parse_apt_signed_by(source_files: List[str]) -> Set[str]: + """Return absolute keyring paths referenced via signed-by / Signed-By.""" + + out: Set[str] = set() + re_signed_by = re.compile(r"signed-by\s*=\s*([^\]\s]+)", re.IGNORECASE) + re_signed_by_hdr = re.compile(r"^\s*Signed-By\s*:\s*(.+)$", re.IGNORECASE) + + for sf in source_files: + try: + with open(sf, "r", encoding="utf-8", errors="replace") as f: + for raw in f: + line = raw.strip() + if not line or line.startswith("#"): + continue + + m = re_signed_by_hdr.match(line) + if m: + val = m.group(1).strip() + if val.startswith("|"): + continue + toks = re.split(r"[\s,]+", val) + for t in toks: + if t.startswith("/"): + out.add(t) + continue + + if "[" in line and "]" in line: + bracket = line.split("[", 1)[1].split("]", 1)[0] + for mm in re_signed_by.finditer(bracket): + val = mm.group(1).strip().strip("\"'") + for t in re.split(r"[\s,]+", val): + if t.startswith("/"): + out.add(t) + continue + + for mm in re_signed_by.finditer(line): + val = mm.group(1).strip().strip("\"'") + for t in re.split(r"[\s,]+", val): + if t.startswith("/"): + out.add(t) + except OSError: + continue + + return out + + +def iter_apt_capture_paths() -> List[Tuple[str, str]]: + """Return (path, reason) pairs for APT configuration.""" + + reasons: Dict[str, str] = {} + + if os.path.isdir("/etc/apt"): + for dirpath, _, filenames in os.walk("/etc/apt"): + for fn in filenames: + p = os.path.join(dirpath, fn) + if os.path.islink(p) or not os.path.isfile(p): + continue + reasons.setdefault(p, "apt_config") + + apt_sources: List[str] = [] + for g in _APT_SOURCE_GLOBS: + apt_sources.extend(iter_matching_files(g)) + for p in sorted(set(apt_sources)): + reasons[p] = "apt_source" + + for g in ( + "/etc/apt/trusted.gpg", + "/etc/apt/trusted.gpg.d/*", + "/etc/apt/keyrings/*", + ): + for p in iter_matching_files(g): + reasons[p] = "apt_keyring" + + signed_by = parse_apt_signed_by(sorted(set(apt_sources))) + for p in sorted(signed_by): + if os.path.islink(p) or not os.path.isfile(p): + continue + if p.startswith("/etc/apt/"): + reasons[p] = "apt_keyring" + else: + reasons[p] = "apt_signed_by_keyring" + + return [(p, reasons[p]) for p in sorted(reasons.keys())] + + +def iter_dnf_capture_paths() -> List[Tuple[str, str]]: + """Return (path, reason) pairs for DNF/YUM configuration on RPM systems.""" + + reasons: Dict[str, str] = {} + + for root, tag in ( + ("/etc/dnf", "dnf_config"), + ("/etc/yum", "yum_config"), + ): + if os.path.isdir(root): + for dirpath, _, filenames in os.walk(root): + for fn in filenames: + p = os.path.join(dirpath, fn) + if os.path.islink(p) or not os.path.isfile(p): + continue + reasons.setdefault(p, tag) + + for p in iter_matching_files("/etc/yum.conf"): + reasons[p] = "yum_conf" + for p in iter_matching_files("/etc/yum.repos.d/*.repo"): + reasons[p] = "yum_repo" + for p in iter_matching_files("/etc/pki/rpm-gpg/*"): + reasons[p] = "rpm_gpg_key" + + return [(p, reasons[p]) for p in sorted(reasons.keys())] + + +def iter_system_capture_paths() -> List[Tuple[str, str]]: + out: List[Tuple[str, str]] = [] + seen: Set[str] = set() + for spec, reason in _SYSTEM_CAPTURE_GLOBS: + for path in iter_matching_files(spec): + if path in seen: + continue + seen.add(path) + out.append((path, reason)) + return sorted(out, key=lambda x: x[0]) diff --git a/tests/test_harvest.py b/tests/test_harvest.py index c84b66c..a308bcf 100644 --- a/tests/test_harvest.py +++ b/tests/test_harvest.py @@ -5,21 +5,28 @@ import pytest from pathlib import Path import enroll.harvest as harvest +import enroll.system_paths as system_paths from enroll.platform import PlatformInfo from enroll.systemd import UnitInfo from enroll.pathfilter import PathFilter -from enroll.harvest import ( - _is_confish, - _hint_names, - _topdirs_for_package, - _iter_matching_files, - _parse_apt_signed_by, - _capture_link, - _capture_file, - ManagedFile, - ManagedLink, - ExcludedFile, - IgnorePolicy, +import enroll.capture as capture +from enroll.capture import ( + capture_file as _capture_file, + capture_link as _capture_link, + capture_user_shell_dotfiles, + files_differ, +) +from enroll.harvest_types import ExcludedFile, ManagedFile, ManagedLink +from enroll.ignore import IgnorePolicy +from enroll.package_hints import ( + add_pkgs_from_etc_topdirs, + hint_names as _hint_names, +) +from enroll.system_paths import ( + is_confish as _is_confish, + iter_matching_files as _iter_matching_files, + parse_apt_signed_by as _parse_apt_signed_by, + topdirs_for_package as _topdirs_for_package, ) from unittest.mock import MagicMock @@ -249,6 +256,7 @@ def test_harvest_dedup_manual_packages_and_builds_etc_custom( return ("root", "root", "0644") monkeypatch.setattr(harvest, "stat_triplet", fake_stat_triplet) + monkeypatch.setattr(capture, "stat_triplet", fake_stat_triplet) # Avoid needing source files on disk by implementing our own bundle copier def fake_copy(bundle_dir: str, role_name: str, abs_path: str, src_rel: str): @@ -256,7 +264,7 @@ def test_harvest_dedup_manual_packages_and_builds_etc_custom( dst.parent.mkdir(parents=True, exist_ok=True) dst.write_bytes(files.get(abs_path, b"")) - monkeypatch.setattr(harvest, "_copy_into_bundle", fake_copy) + monkeypatch.setattr(capture, "copy_into_bundle", fake_copy) state_path = harvest.harvest(str(bundle), policy=AllowAllPolicy()) st = json.loads(Path(state_path).read_text(encoding="utf-8")) @@ -327,8 +335,8 @@ def test_shared_cron_snippet_prefers_matching_role_over_lexicographic( # Only include the cron snippet in the system capture set. monkeypatch.setattr( - harvest, - "_iter_system_capture_paths", + system_paths, + "iter_system_capture_paths", lambda: [("/etc/cron.d/ntpsec", "system_cron")], ) @@ -392,6 +400,7 @@ def test_shared_cron_snippet_prefers_matching_role_over_lexicographic( monkeypatch.setattr(harvest, "get_backend", lambda info=None: backend) monkeypatch.setattr(harvest, "stat_triplet", lambda p: ("root", "root", "0644")) + monkeypatch.setattr(capture, "stat_triplet", lambda p: ("root", "root", "0644")) monkeypatch.setattr(harvest, "collect_non_system_users", lambda: []) def fake_copy(bundle_dir: str, role_name: str, abs_path: str, src_rel: str): @@ -399,7 +408,7 @@ def test_shared_cron_snippet_prefers_matching_role_over_lexicographic( dst.parent.mkdir(parents=True, exist_ok=True) dst.write_bytes(files[abs_path]) - monkeypatch.setattr(harvest, "_copy_into_bundle", fake_copy) + monkeypatch.setattr(capture, "copy_into_bundle", fake_copy) state_path = harvest.harvest(str(bundle), policy=AllowAllPolicy()) st = json.loads(Path(state_path).read_text(encoding="utf-8")) @@ -421,7 +430,7 @@ def test_files_differ_binary(tmp_path: Path): file2 = tmp_path / "file2.bin" file1.write_bytes(b"\x00\x01\x02\x03") file2.write_bytes(b"\x00\x01\x02\x03") - assert harvest._files_differ(str(file1), str(file2)) is False + assert files_differ(str(file1), str(file2)) is False def test_files_differ_binary_different(tmp_path: Path): @@ -429,7 +438,7 @@ def test_files_differ_binary_different(tmp_path: Path): file2 = tmp_path / "file2.bin" file1.write_bytes(b"\x00\x01\x02\x03") file2.write_bytes(b"\x00\x01\x02\x04") - assert harvest._files_differ(str(file1), str(file2)) is True + assert files_differ(str(file1), str(file2)) is True def test_files_differ_non_regular_a(tmp_path: Path): @@ -437,14 +446,14 @@ def test_files_differ_non_regular_a(tmp_path: Path): directory.mkdir() file1 = tmp_path / "file1.txt" file1.write_text("content", encoding="utf-8") - assert harvest._files_differ(str(directory), str(file1)) is True + assert files_differ(str(directory), str(file1)) is True def test_topdirs_for_package_with_multiple_paths(): pkg_to_etc_paths = { "nginx": ["/etc/nginx/nginx.conf", "/etc/nginx/sites-enabled/default"], } - result = harvest._topdirs_for_package("nginx", pkg_to_etc_paths) + result = _topdirs_for_package("nginx", pkg_to_etc_paths) assert result == {"nginx"} @@ -452,12 +461,12 @@ def test_topdirs_for_package_with_multiple_topdirs(): pkg_to_etc_paths = { "multi": ["/etc/nginx/nginx.conf", "/etc/ssh/sshd_config"], } - result = harvest._topdirs_for_package("multi", pkg_to_etc_paths) + result = _topdirs_for_package("multi", pkg_to_etc_paths) assert result == {"nginx", "ssh"} def test_topdirs_for_package_empty(): - result = harvest._topdirs_for_package("empty", {}) + result = _topdirs_for_package("empty", {}) assert result == set() @@ -465,7 +474,7 @@ def test_topdirs_for_package_no_etc(): pkg_to_etc_paths = { "other": ["/usr/share/doc/file"], } - result = harvest._topdirs_for_package("other", pkg_to_etc_paths) + result = _topdirs_for_package("other", pkg_to_etc_paths) assert result == set() @@ -475,7 +484,7 @@ def test_files_differ_same_content(tmp_path: Path): file_b = tmp_path / "b.txt" file_a.write_text("same content", encoding="utf-8") file_b.write_text("same content", encoding="utf-8") - assert harvest._files_differ(str(file_a), str(file_b)) is False + assert files_differ(str(file_a), str(file_b)) is False def test_files_differ_different_content(tmp_path: Path): @@ -484,7 +493,7 @@ def test_files_differ_different_content(tmp_path: Path): file_b = tmp_path / "b.txt" file_a.write_text("content a", encoding="utf-8") file_b.write_text("content b", encoding="utf-8") - assert harvest._files_differ(str(file_a), str(file_b)) is True + assert files_differ(str(file_a), str(file_b)) is True def test_files_differ_missing_file(tmp_path: Path): @@ -492,7 +501,7 @@ def test_files_differ_missing_file(tmp_path: Path): file_a = tmp_path / "a.txt" file_a.write_text("content", encoding="utf-8") file_b = tmp_path / "b.txt" - assert harvest._files_differ(str(file_a), str(file_b)) is True + assert files_differ(str(file_a), str(file_b)) is True def test_files_differ_both_missing(tmp_path: Path): @@ -500,7 +509,7 @@ def test_files_differ_both_missing(tmp_path: Path): file_a = tmp_path / "a.txt" file_b = tmp_path / "b.txt" # Both missing - should return True (they differ in the sense that neither exists) - assert harvest._files_differ(str(file_a), str(file_b)) is True + assert files_differ(str(file_a), str(file_b)) is True def test_files_differ_non_regular_b(tmp_path: Path): @@ -510,7 +519,7 @@ def test_files_differ_non_regular_b(tmp_path: Path): link_b = tmp_path / "link" link_b.symlink_to(file_a) # Symlinks are followed, so content is the same - assert harvest._files_differ(str(file_a), str(link_b)) is False + assert files_differ(str(file_a), str(link_b)) is False def test_files_differ_oserror_on_read(tmp_path: Path, monkeypatch): @@ -524,7 +533,7 @@ def test_files_differ_oserror_on_read(tmp_path: Path, monkeypatch): raise OSError("Permission denied") monkeypatch.setattr("builtins.open", fake_open, raising=False) - assert harvest._files_differ(str(file_a), str(file_b)) is True + assert files_differ(str(file_a), str(file_b)) is True def test_files_differ_large_file_returns_true(tmp_path: Path): @@ -536,7 +545,7 @@ def test_files_differ_large_file_returns_true(tmp_path: Path): file_a.write_bytes(data) file_b.write_bytes(data) # Should return True because files are too large - assert harvest._files_differ(str(file_a), str(file_b), max_bytes=1_000_000) is True + assert files_differ(str(file_a), str(file_b), max_bytes=1_000_000) is True def test_files_differ_size_mismatch(tmp_path: Path): @@ -545,7 +554,7 @@ def test_files_differ_size_mismatch(tmp_path: Path): file_b = tmp_path / "b.txt" file_a.write_text("short", encoding="utf-8") file_b.write_text("much longer content here", encoding="utf-8") - assert harvest._files_differ(str(file_a), str(file_b)) is True + assert files_differ(str(file_a), str(file_b)) is True def test_files_differ_large_files(tmp_path: Path): @@ -556,12 +565,12 @@ def test_files_differ_large_files(tmp_path: Path): data = b"x" * 10000 file_a.write_bytes(data) file_b.write_bytes(data) - assert harvest._files_differ(str(file_a), str(file_b)) is False + assert files_differ(str(file_a), str(file_b)) is False def test_hint_names_with_unit_and_packages(): """Test _hint_names extracts hints from unit and packages.""" - result = harvest._hint_names("nginx.service", {"nginx-common", "nginx-core"}) + result = _hint_names("nginx.service", {"nginx-common", "nginx-core"}) assert "nginx" in result assert "nginx-common" in result assert "nginx-core" in result @@ -569,20 +578,20 @@ def test_hint_names_with_unit_and_packages(): def test_hint_names_with_template_unit(): """Test _hint_names handles template units.""" - result = harvest._hint_names("getty@tty1.service", set()) + result = _hint_names("getty@tty1.service", set()) assert "getty" in result assert "getty@tty1" in result def test_hint_names_with_dotted_unit(): """Test _hint_names handles dotted unit names.""" - result = harvest._hint_names("nginx.service", set()) + result = _hint_names("nginx.service", set()) assert "nginx" in result def test_hint_names_empty(): """Test _hint_names with empty inputs.""" - result = harvest._hint_names("", set()) + result = _hint_names("", set()) assert result == set() @@ -594,7 +603,7 @@ def test_add_pkgs_from_etc_topdirs(): "ssh": {"openssh-server"}, } pkgs = set() - harvest._add_pkgs_from_etc_topdirs(hints, topdir_to_pkgs, pkgs) + add_pkgs_from_etc_topdirs(hints, topdir_to_pkgs, pkgs) # Should add packages from matching topdirs assert "nginx-common" in pkgs or "nginx-core" in pkgs @@ -604,7 +613,7 @@ def test_add_pkgs_from_etc_topdirs_empty(): hints = set() topdir_to_pkgs = {} pkgs = set() - harvest._add_pkgs_from_etc_topdirs(hints, topdir_to_pkgs, pkgs) + add_pkgs_from_etc_topdirs(hints, topdir_to_pkgs, pkgs) assert pkgs == set() @@ -612,47 +621,47 @@ def test_is_confish_with_conf(tmp_path: Path): """Test _is_confish recognizes .conf files.""" file1 = tmp_path / "test.conf" file1.write_text("[Unit]", encoding="utf-8") - assert harvest._is_confish(str(file1)) is True + assert _is_confish(str(file1)) is True def test_is_confish_with_yaml(tmp_path: Path): """Test _is_confish recognizes .yaml files.""" file1 = tmp_path / "test.yaml" file1.write_text("key: value", encoding="utf-8") - assert harvest._is_confish(str(file1)) is True + assert _is_confish(str(file1)) is True def test_is_confish_with_json(tmp_path: Path): """Test _is_confish recognizes .json files.""" file1 = tmp_path / "test.json" file1.write_text('{"key": "value"}', encoding="utf-8") - assert harvest._is_confish(str(file1)) is True + assert _is_confish(str(file1)) is True def test_is_confish_with_service(tmp_path: Path): """Test _is_confish recognizes .service files.""" file1 = tmp_path / "test.service" file1.write_text("[Unit]", encoding="utf-8") - assert harvest._is_confish(str(file1)) is True + assert _is_confish(str(file1)) is True def test_is_confish_with_extensionless(tmp_path: Path): """Test _is_confish recognizes extensionless config files.""" file1 = tmp_path / "default" file1.write_text("OPTIONS=", encoding="utf-8") - assert harvest._is_confish(str(file1)) is True + assert _is_confish(str(file1)) is True def test_is_confish_not_config(tmp_path: Path): """Test _is_confish rejects non-config files.""" file1 = tmp_path / "test.log" file1.write_text("log", encoding="utf-8") - assert harvest._is_confish(str(file1)) is False + assert _is_confish(str(file1)) is False def test_is_confish_nonexistent(): """Test _is_confish returns False for nonexistent files.""" - assert harvest._is_confish("/nonexistent/file.xyz") is False + assert _is_confish("/nonexistent/file.xyz") is False """Additional coverage tests for harvest.py""" @@ -1065,7 +1074,7 @@ def test_user_shell_dotfiles_are_not_auto_captured_without_dangerous(tmp_path: P managed: list[ManagedFile] = [] excluded: list[ExcludedFile] = [] - captured = harvest._capture_user_shell_dotfiles( + captured = capture_user_shell_dotfiles( bundle_dir=str(tmp_path / "bundle"), role_name="users", home=str(home), @@ -1106,7 +1115,7 @@ def test_user_shell_dotfiles_dangerous_captures_changed_files_only(tmp_path: Pat managed: list[ManagedFile] = [] excluded: list[ExcludedFile] = [] - captured = harvest._capture_user_shell_dotfiles( + captured = capture_user_shell_dotfiles( bundle_dir=str(tmp_path / "bundle"), role_name="users", home=str(home), diff --git a/tests/test_harvest_collectors.py b/tests/test_harvest_collectors.py index f72a0f9..8d59a79 100644 --- a/tests/test_harvest_collectors.py +++ b/tests/test_harvest_collectors.py @@ -1,13 +1,10 @@ from __future__ import annotations -from enroll.harvest import ( - FirewallRuntimeSnapshot, - HarvestContext, - IgnorePolicy, - PathFilter, - RuntimeStateCollector, - SysctlSnapshot, -) +from enroll.harvest_collectors.context import HarvestContext +from enroll.harvest_collectors.runtime import RuntimeStateCollector +from enroll.harvest_types import FirewallRuntimeSnapshot, SysctlSnapshot +from enroll.ignore import IgnorePolicy +from enroll.pathfilter import PathFilter class _Backend: diff --git a/tests/test_harvest_cron_logrotate.py b/tests/test_harvest_cron_logrotate.py index d20d371..8e614b3 100644 --- a/tests/test_harvest_cron_logrotate.py +++ b/tests/test_harvest_cron_logrotate.py @@ -4,6 +4,8 @@ import json from pathlib import Path import enroll.harvest as h +import enroll.capture as capture +import enroll.harvest_collectors.cron_logrotate as cron_logrotate from enroll.platform import PlatformInfo from enroll.systemd import UnitInfo @@ -89,7 +91,7 @@ def test_harvest_unifies_cron_and_logrotate_into_dedicated_package_roles( } return list(mapping.get(spec, []))[:cap] - monkeypatch.setattr(h, "_iter_matching_files", fake_iter_matching) + monkeypatch.setattr(cron_logrotate, "iter_matching_files", fake_iter_matching) # Avoid real system probing. monkeypatch.setattr( @@ -128,7 +130,7 @@ def test_harvest_unifies_cron_and_logrotate_into_dedicated_package_roles( ) monkeypatch.setattr(h, "collect_non_system_users", lambda: []) monkeypatch.setattr( - h, + capture, "stat_triplet", lambda p: ("alice" if "alice" in p else "root", "root", "0644"), ) @@ -139,7 +141,7 @@ def test_harvest_unifies_cron_and_logrotate_into_dedicated_package_roles( dst.parent.mkdir(parents=True, exist_ok=True) dst.write_bytes(files.get(abs_path, b"")) - monkeypatch.setattr(h, "_copy_into_bundle", fake_copy) + monkeypatch.setattr(capture, "copy_into_bundle", fake_copy) state_path = h.harvest(str(bundle), policy=AllowAllPolicy()) st = json.loads(Path(state_path).read_text(encoding="utf-8")) diff --git a/tests/test_harvest_helpers.py b/tests/test_harvest_helpers.py index 5131809..07ae690 100644 --- a/tests/test_harvest_helpers.py +++ b/tests/test_harvest_helpers.py @@ -4,6 +4,8 @@ import os from pathlib import Path import enroll.harvest as h +import enroll.system_paths as sp +from enroll.package_hints import role_name_from_pkg, role_name_from_unit def test_iter_matching_files_skips_symlinks_and_walks_dirs(monkeypatch, tmp_path: Path): @@ -24,12 +26,12 @@ def test_iter_matching_files_skips_symlinks_and_walks_dirs(monkeypatch, tmp_path str(root / "link"): "link", } - monkeypatch.setattr(h.glob, "glob", lambda spec: [str(root), str(root / "link")]) - monkeypatch.setattr(h.os.path, "islink", lambda p: paths.get(p) == "link") - monkeypatch.setattr(h.os.path, "isfile", lambda p: paths.get(p) == "file") - monkeypatch.setattr(h.os.path, "isdir", lambda p: paths.get(p) == "dir") + monkeypatch.setattr(sp.glob, "glob", lambda spec: [str(root), str(root / "link")]) + monkeypatch.setattr(sp.os.path, "islink", lambda p: paths.get(p) == "link") + monkeypatch.setattr(sp.os.path, "isfile", lambda p: paths.get(p) == "file") + monkeypatch.setattr(sp.os.path, "isdir", lambda p: paths.get(p) == "dir") monkeypatch.setattr( - h.os, + sp.os, "walk", lambda p: [ (str(root), ["sub"], ["real.txt", "link"]), @@ -37,7 +39,7 @@ def test_iter_matching_files_skips_symlinks_and_walks_dirs(monkeypatch, tmp_path ], ) - out = h._iter_matching_files("/whatever/*", cap=100) + out = sp.iter_matching_files("/whatever/*", cap=100) assert str(root / "real.txt") in out assert str(root / "sub" / "nested.txt") in out assert str(root / "link") not in out @@ -57,7 +59,7 @@ def test_parse_apt_signed_by_extracts_keyrings(tmp_path: Path): f3 = tmp_path / "c.sources" f3.write_text("Signed-By: | /bin/echo nope\n", encoding="utf-8") - out = h._parse_apt_signed_by([str(f1), str(f2), str(f3)]) + out = sp.parse_apt_signed_by([str(f1), str(f2), str(f3)]) assert "/usr/share/keyrings/foo.gpg" in out assert "/etc/apt/keyrings/bar.gpg" in out assert "/usr/share/keyrings/baz.gpg" in out @@ -74,9 +76,9 @@ def test_iter_apt_capture_paths_includes_signed_by_keyring(monkeypatch): "/usr/share/keyrings/ext.gpg": "file", } - monkeypatch.setattr(h.os.path, "isdir", lambda p: p in {"/etc/apt"}) + monkeypatch.setattr(sp.os.path, "isdir", lambda p: p in {"/etc/apt"}) monkeypatch.setattr( - h.os, + sp.os, "walk", lambda root: [ ("/etc/apt", ["apt.conf.d", "sources.list.d"], []), @@ -84,8 +86,8 @@ def test_iter_apt_capture_paths_includes_signed_by_keyring(monkeypatch): ("/etc/apt/sources.list.d", [], ["test.list"]), ], ) - monkeypatch.setattr(h.os.path, "islink", lambda p: False) - monkeypatch.setattr(h.os.path, "isfile", lambda p: files.get(p) == "file") + monkeypatch.setattr(sp.os.path, "islink", lambda p: False) + monkeypatch.setattr(sp.os.path, "isfile", lambda p: files.get(p) == "file") # Only treat the sources glob as having a hit. def fake_iter_matching(spec: str, cap: int = 10000): @@ -93,7 +95,7 @@ def test_iter_apt_capture_paths_includes_signed_by_keyring(monkeypatch): return ["/etc/apt/sources.list.d/test.list"] return [] - monkeypatch.setattr(h, "_iter_matching_files", fake_iter_matching) + monkeypatch.setattr(sp, "iter_matching_files", fake_iter_matching) # Provide file contents for the sources file. real_open = open @@ -105,10 +107,10 @@ def test_iter_apt_capture_paths_includes_signed_by_keyring(monkeypatch): # Easier: patch _parse_apt_signed_by directly to avoid filesystem reads. monkeypatch.setattr( - h, "_parse_apt_signed_by", lambda sfs: {"/usr/share/keyrings/ext.gpg"} + sp, "parse_apt_signed_by", lambda sfs: {"/usr/share/keyrings/ext.gpg"} ) - out = h._iter_apt_capture_paths() + out = sp.iter_apt_capture_paths() paths = {p for p, _r in out} reasons = {p: r for p, r in out} assert "/etc/apt/apt.conf.d/00test" in paths @@ -138,19 +140,23 @@ def test_iter_dnf_capture_paths(monkeypatch): return [("/etc/pki/rpm-gpg", [], ["RPM-GPG-KEY"])] return [] - monkeypatch.setattr(h.os.path, "isdir", isdir) - monkeypatch.setattr(h.os, "walk", walk) - monkeypatch.setattr(h.os.path, "islink", lambda p: False) - monkeypatch.setattr(h.os.path, "isfile", lambda p: files.get(p) == "file") - monkeypatch.setattr( - h, - "_iter_matching_files", - lambda spec, cap=10000: ( - ["/etc/yum.repos.d/test.repo"] if spec.endswith("*.repo") else [] - ), - ) + monkeypatch.setattr(sp.os.path, "isdir", isdir) + monkeypatch.setattr(sp.os, "walk", walk) + monkeypatch.setattr(sp.os.path, "islink", lambda p: False) + monkeypatch.setattr(sp.os.path, "isfile", lambda p: files.get(p) == "file") - out = h._iter_dnf_capture_paths() + def fake_iter_matching(spec: str, cap: int = 10000): + if spec == "/etc/yum.conf": + return ["/etc/yum.conf"] + if spec.endswith("*.repo"): + return ["/etc/yum.repos.d/test.repo"] + if spec == "/etc/pki/rpm-gpg/*": + return ["/etc/pki/rpm-gpg/RPM-GPG-KEY"] + return [] + + monkeypatch.setattr(sp, "iter_matching_files", fake_iter_matching) + + out = sp.iter_dnf_capture_paths() paths = {p for p, _r in out} assert "/etc/dnf/dnf.conf" in paths assert "/etc/yum/yum.conf" in paths @@ -160,13 +166,13 @@ def test_iter_dnf_capture_paths(monkeypatch): def test_iter_system_capture_paths_dedupes_first_reason(monkeypatch): - monkeypatch.setattr(h, "_SYSTEM_CAPTURE_GLOBS", [("/a", "r1"), ("/b", "r2")]) + monkeypatch.setattr(sp, "_SYSTEM_CAPTURE_GLOBS", [("/a", "r1"), ("/b", "r2")]) monkeypatch.setattr( - h, - "_iter_matching_files", + sp, + "iter_matching_files", lambda spec, cap=10000: ["/dup"] if spec in {"/a", "/b"} else [], ) - out = h._iter_system_capture_paths() + out = sp.iter_system_capture_paths() assert out == [("/dup", "r1")] @@ -289,20 +295,16 @@ def test_collect_firewall_runtime_snapshot_is_per_family_fallback( def test_package_role_names_do_not_collide_with_singleton_roles(): - from enroll.harvest import _role_name_from_pkg - - assert _role_name_from_pkg("flatpak") == "package_flatpak" - assert _role_name_from_pkg("snap") == "package_snap" - assert _role_name_from_pkg("users") == "package_users" - assert _role_name_from_pkg("nginx") == "nginx" + assert role_name_from_pkg("flatpak") == "package_flatpak" + assert role_name_from_pkg("snap") == "package_snap" + assert role_name_from_pkg("users") == "package_users" + assert role_name_from_pkg("nginx") == "nginx" def test_service_role_names_do_not_collide_with_singleton_roles(): - from enroll.harvest import _role_name_from_unit - - assert _role_name_from_unit("flatpak.service") == "service_flatpak" - assert _role_name_from_unit("users.service") == "service_users" - assert _role_name_from_unit("nginx.service") == "nginx" + assert role_name_from_unit("flatpak.service") == "service_flatpak" + assert role_name_from_unit("users.service") == "service_users" + assert role_name_from_unit("nginx.service") == "nginx" def test_parse_sysctl_a_output_keeps_persistable_values(monkeypatch): diff --git a/tests/test_harvest_symlinks.py b/tests/test_harvest_symlinks.py index b327542..c177cda 100644 --- a/tests/test_harvest_symlinks.py +++ b/tests/test_harvest_symlinks.py @@ -2,6 +2,8 @@ import json from pathlib import Path import enroll.harvest as h +import enroll.harvest_collectors.services as services +import enroll.capture as capture from enroll.platform import PlatformInfo from enroll.systemd import UnitInfo @@ -78,7 +80,7 @@ def _base_monkeypatches(monkeypatch, *, unit: str): # Avoid walking the real filesystem. monkeypatch.setattr(h.os, "walk", lambda root: iter(())) - monkeypatch.setattr(h, "_copy_into_bundle", lambda *a, **k: None) + monkeypatch.setattr(capture, "copy_into_bundle", lambda *a, **k: None) # Default to a "no files exist" view of the world unless a test overrides. monkeypatch.setattr(h.os.path, "isfile", lambda p: False) @@ -119,7 +121,7 @@ def test_harvest_captures_nginx_enabled_symlinks(monkeypatch, tmp_path: Path): return ["/etc/nginx/modules-enabled/mod-http"] return [] - monkeypatch.setattr(h.glob, "glob", fake_glob) + monkeypatch.setattr(services.glob, "glob", fake_glob) state_path = h.harvest(str(bundle), policy=AllowAllPolicy()) st = json.loads(Path(state_path).read_text(encoding="utf-8")) @@ -158,7 +160,7 @@ def test_harvest_does_not_capture_enabled_symlinks_without_role( }, ) monkeypatch.setattr( - h.glob, "glob", lambda pat: ["/etc/nginx/sites-enabled/default"] + services.glob, "glob", lambda pat: ["/etc/nginx/sites-enabled/default"] ) monkeypatch.setattr(h.os.path, "islink", lambda p: True) monkeypatch.setattr(h.os, "readlink", lambda p: "../sites-available/default") @@ -186,7 +188,7 @@ def test_harvest_symlink_capture_respects_ignore_policy(monkeypatch, tmp_path: P monkeypatch.setattr(h.os.path, "islink", lambda p: p in links) monkeypatch.setattr(h.os, "readlink", lambda p: links[p]) monkeypatch.setattr( - h.glob, + services.glob, "glob", lambda pat: ( sorted(list(links.keys())) if pat == "/etc/nginx/sites-enabled/*" else [] @@ -251,7 +253,7 @@ def test_harvest_captures_apache2_enabled_symlinks(monkeypatch, tmp_path: Path): return ["/etc/apache2/conf-enabled/security.conf"] return [] - monkeypatch.setattr(h.glob, "glob", fake_glob) + monkeypatch.setattr(services.glob, "glob", fake_glob) state_path = h.harvest(str(bundle), policy=AllowAllPolicy()) st = json.loads(Path(state_path).read_text(encoding="utf-8")) diff --git a/tests/test_manifest_puppet.py b/tests/test_manifest_puppet.py index 699b2b3..51f4de9 100644 --- a/tests/test_manifest_puppet.py +++ b/tests/test_manifest_puppet.py @@ -3,6 +3,8 @@ from __future__ import annotations import json from pathlib import Path +import yaml + from enroll import manifest @@ -160,36 +162,55 @@ def test_manifest_puppet_writes_control_repo_style_output(tmp_path: Path): manifest.manifest(str(bundle), str(out), target="puppet", fqdn="test.example") site_pp = (out / "manifests" / "site.pp").read_text(encoding="utf-8") - assert site_pp == ( - "node 'test.example' {\n" - " include curl\n" - " include foo\n" - " include users\n" - " include sysctl\n" - "}\n" + assert "node 'test.example' {" in site_pp + assert "lookup('enroll::classes'" in site_pp + assert "$enroll_classes.each" in site_pp + assert "include $enroll_class" in site_pp + assert "node default {" in site_pp + + assert (out / "hiera.yaml").exists() + node_data = yaml.safe_load( + (out / "data" / "nodes" / "test.example.yaml").read_text(encoding="utf-8") + ) + assert node_data["enroll::classes"] == ["curl", "foo", "users", "sysctl"] + assert node_data["curl::packages"] == ["curl"] + assert node_data["foo::packages"] == ["foo"] + assert node_data["foo::files"]["/etc/foo/foo.conf"]["source"] == ( + "puppet:///modules/foo/nodes/test.example/etc/foo.conf" + ) + assert node_data["foo::services"]["foo.service"] == { + "ensure": "running", + "enable": True, + } + assert node_data["users::users"]["alice"]["comment"] == "Alice Example" + assert node_data["users::users"]["alice"]["groups"] == ["docker"] + assert node_data["sysctl::files"]["/etc/sysctl.d/99-enroll.conf"]["source"] == ( + "puppet:///modules/sysctl/nodes/test.example/sysctl/99-enroll.conf" ) curl_pp = (out / "modules" / "curl" / "manifests" / "init.pp").read_text( encoding="utf-8" ) assert "class curl" in curl_pp - assert "package { 'curl':" in curl_pp + assert "Array[String] $packages = []" in curl_pp + assert "package { $package_name:" in curl_pp + assert "package { 'curl':" not in curl_pp foo_pp = (out / "modules" / "foo" / "manifests" / "init.pp").read_text( encoding="utf-8" ) assert "class foo" in foo_pp - assert "package { 'foo':" in foo_pp - assert "file { '/etc/foo/foo.conf':" in foo_pp - assert "source => 'puppet:///modules/foo/etc/foo.conf'" in foo_pp - assert "service { 'foo.service':" in foo_pp + assert "Hash[String, Hash] $files = {}" in foo_pp + assert "* => $attrs" in foo_pp + assert "package { 'foo':" not in foo_pp + assert "file { '/etc/foo/foo.conf':" not in foo_pp users_pp = (out / "modules" / "users" / "manifests" / "init.pp").read_text( encoding="utf-8" ) assert "class users" in users_pp - assert "group { 'docker':" in users_pp - assert "user { 'alice':" in users_pp + assert "Hash[String, Hash] $users = {}" in users_pp + assert "user { 'alice':" not in users_pp sysctl_pp = (out / "modules" / "sysctl" / "manifests" / "init.pp").read_text( encoding="utf-8" @@ -198,11 +219,162 @@ def test_manifest_puppet_writes_control_repo_style_output(tmp_path: Path): assert "Boolean $sysctl_apply = true" in sysctl_pp assert "Boolean $sysctl_ignore_apply_errors = true" in sysctl_pp assert "exec { 'enroll-apply-sysctl':" in sysctl_pp - assert "command => $sysctl_ignore_apply_errors ? {" in sysctl_pp - assert "sysctl -e -p /etc/sysctl.d/99-enroll.conf || true" in sysctl_pp + assert "$files.has_key('/etc/sysctl.d/99-enroll.conf')" in sysctl_pp - assert (out / "modules" / "foo" / "files" / "etc" / "foo.conf").exists() - assert (out / "modules" / "sysctl" / "files" / "sysctl" / "99-enroll.conf").exists() + assert ( + out + / "modules" + / "foo" + / "files" + / "nodes" + / "test.example" + / "etc" + / "foo.conf" + ).exists() + assert ( + out + / "modules" + / "sysctl" + / "files" + / "nodes" + / "test.example" + / "sysctl" + / "99-enroll.conf" + ).exists() + + +def test_manifest_puppet_fqdn_mode_can_accumulate_separate_node_data( + tmp_path: Path, +): + out = tmp_path / "puppet" + + def write_bundle(name: str, content: str) -> Path: + bundle = tmp_path / name + artifact = bundle / "artifacts" / "foo" / "etc" / "foo.conf" + artifact.parent.mkdir(parents=True, exist_ok=True) + artifact.write_text(content, encoding="utf-8") + _write_state( + bundle, + { + "schema_version": 3, + "host": {"hostname": name, "os": "debian", "pkg_backend": "dpkg"}, + "inventory": {"packages": {}}, + "roles": { + "services": [ + { + "unit": "foo.service", + "role_name": "foo", + "packages": ["foo"], + "active_state": "active", + "unit_file_state": "enabled", + "managed_dirs": [], + "managed_files": [ + { + "path": "/etc/foo/foo.conf", + "src_rel": "etc/foo.conf", + "owner": "root", + "group": "root", + "mode": "0644", + } + ], + "managed_links": [], + } + ], + "packages": [], + "users": { + "role_name": "users", + "users": [], + "managed_dirs": [], + "managed_files": [], + }, + "apt_config": { + "role_name": "apt_config", + "managed_dirs": [], + "managed_files": [], + }, + "dnf_config": { + "role_name": "dnf_config", + "managed_dirs": [], + "managed_files": [], + }, + "sysctl": { + "role_name": "sysctl", + "managed_dirs": [], + "managed_files": [], + }, + "firewall_runtime": { + "role_name": "firewall_runtime", + "packages": [], + }, + "etc_custom": { + "role_name": "etc_custom", + "managed_dirs": [], + "managed_files": [], + }, + "usr_local_custom": { + "role_name": "usr_local_custom", + "managed_dirs": [], + "managed_files": [], + }, + "extra_paths": { + "role_name": "extra_paths", + "managed_dirs": [], + "managed_files": [], + "managed_links": [], + }, + }, + }, + ) + return bundle + + first = write_bundle("first", "first = true\n") + second = write_bundle("second", "second = true\n") + + manifest.manifest(str(first), str(out), target="puppet", fqdn="first.example") + manifest.manifest(str(second), str(out), target="puppet", fqdn="second.example") + + assert (out / "data" / "nodes" / "first.example.yaml").exists() + assert (out / "data" / "nodes" / "second.example.yaml").exists() + + site_pp = (out / "manifests" / "site.pp").read_text(encoding="utf-8") + assert "node 'first.example' {" in site_pp + assert "node 'second.example' {" in site_pp + + first_artifact = ( + out + / "modules" + / "foo" + / "files" + / "nodes" + / "first.example" + / "etc" + / "foo.conf" + ) + second_artifact = ( + out + / "modules" + / "foo" + / "files" + / "nodes" + / "second.example" + / "etc" + / "foo.conf" + ) + assert first_artifact.read_text(encoding="utf-8") == "first = true\n" + assert second_artifact.read_text(encoding="utf-8") == "second = true\n" + + first_data = yaml.safe_load( + (out / "data" / "nodes" / "first.example.yaml").read_text(encoding="utf-8") + ) + second_data = yaml.safe_load( + (out / "data" / "nodes" / "second.example.yaml").read_text(encoding="utf-8") + ) + assert first_data["foo::files"]["/etc/foo/foo.conf"]["source"] == ( + "puppet:///modules/foo/nodes/first.example/etc/foo.conf" + ) + assert second_data["foo::files"]["/etc/foo/foo.conf"]["source"] == ( + "puppet:///modules/foo/nodes/second.example/etc/foo.conf" + ) def test_manifest_puppet_uses_default_node_and_common_package_modules(tmp_path: Path):