More refactoring, support hiera and multi site mode for Puppet
All checks were successful
CI / test (push) Successful in 15m30s
Lint / test (push) Successful in 44s

This commit is contained in:
Miguel Jacq 2026-06-17 10:54:46 +10:00
parent ed9ec6893a
commit 20cc48e1ce
Signed by: mig5
GPG key ID: 03906B4110AAD3B8
18 changed files with 1647 additions and 1189 deletions

275
enroll/capture.py Normal file
View file

@ -0,0 +1,275 @@
from __future__ import annotations
import os
import shutil
import stat
from typing import List, Optional, Set
from .fsutil import stat_triplet
from .harvest_types import ExcludedFile, ManagedFile, ManagedLink
from .ignore import IgnorePolicy
from .pathfilter import PathFilter
def files_differ(a: str, b: str, *, max_bytes: int = 2_000_000) -> bool:
"""Return True if file ``a`` differs from file ``b``.
Best-effort and conservative: unreadable/missing baselines, non-regular
files, and unexpectedly large files are treated as different so callers err
on the side of preserving user state.
"""
try:
st_a = os.stat(a, follow_symlinks=True)
except OSError:
return True
if not stat.S_ISREG(st_a.st_mode):
return True
try:
st_b = os.stat(b, follow_symlinks=True)
except OSError:
return True
if not stat.S_ISREG(st_b.st_mode):
return True
if st_a.st_size != st_b.st_size:
return True
if st_a.st_size > max_bytes:
return True
try:
with open(a, "rb") as fa, open(b, "rb") as fb:
while True:
ca = fa.read(1024 * 64)
cb = fb.read(1024 * 64)
if ca != cb:
return True
if not ca:
return False
except OSError:
return True
def copy_into_bundle(
bundle_dir: str, role_name: str, abs_path: str, src_rel: str
) -> None:
dst = os.path.join(bundle_dir, "artifacts", role_name, src_rel)
os.makedirs(os.path.dirname(dst), exist_ok=True)
shutil.copy2(abs_path, dst)
def capture_file(
*,
bundle_dir: str,
role_name: str,
abs_path: str,
reason: str,
policy: IgnorePolicy,
path_filter: PathFilter,
managed_out: List[ManagedFile],
excluded_out: List[ExcludedFile],
seen_role: Optional[Set[str]] = None,
seen_global: Optional[Set[str]] = None,
metadata: Optional[tuple[str, str, str]] = None,
) -> bool:
"""Try to capture a single file into the bundle.
Returns True if the file was copied and appended to ``managed_out``.
``seen_role`` de-duplicates within a role; ``seen_global`` de-duplicates
across harvest stages so multiple generated roles do not manage one path.
"""
if seen_global is not None and abs_path in seen_global:
return False
if seen_role is not None and abs_path in seen_role:
return False
def _mark_seen() -> None:
if seen_role is not None:
seen_role.add(abs_path)
if seen_global is not None:
seen_global.add(abs_path)
if path_filter.is_excluded(abs_path):
excluded_out.append(ExcludedFile(path=abs_path, reason="user_excluded"))
_mark_seen()
return False
deny = policy.deny_reason(abs_path)
if deny:
excluded_out.append(ExcludedFile(path=abs_path, reason=deny))
_mark_seen()
return False
try:
owner, group, mode = (
metadata if metadata is not None else stat_triplet(abs_path)
)
except OSError:
excluded_out.append(ExcludedFile(path=abs_path, reason="unreadable"))
_mark_seen()
return False
src_rel = abs_path.lstrip("/")
try:
copy_into_bundle(bundle_dir, role_name, abs_path, src_rel)
except OSError:
excluded_out.append(ExcludedFile(path=abs_path, reason="unreadable"))
_mark_seen()
return False
managed_out.append(
ManagedFile(
path=abs_path,
src_rel=src_rel,
owner=owner,
group=group,
mode=mode,
reason=reason,
)
)
_mark_seen()
return True
USER_SHELL_DOTFILES_WITH_SKEL_BASELINE = [
(".bashrc", "user_shell_rc"),
(".profile", "user_profile"),
(".bash_logout", "user_shell_logout"),
]
USER_SHELL_DOTFILES_WITHOUT_SKEL_BASELINE = [
(".bash_aliases", "user_shell_aliases"),
]
def capture_user_shell_dotfiles(
*,
bundle_dir: str,
role_name: str,
home: str,
skel_dir: str,
enabled: bool,
policy: IgnorePolicy,
path_filter: PathFilter,
managed_out: List[ManagedFile],
excluded_out: List[ExcludedFile],
seen_role: Optional[Set[str]],
seen_global: Optional[Set[str]],
) -> int:
"""Capture selected per-user shell dotfiles when explicitly enabled."""
if not enabled:
return 0
home = (home or "").rstrip("/")
if not home or not home.startswith("/"):
return 0
captured = 0
max_compare_bytes = int(getattr(policy, "max_file_bytes", 256_000))
for rel, reason in USER_SHELL_DOTFILES_WITH_SKEL_BASELINE:
upath = os.path.join(home, rel)
if not os.path.isfile(upath) or os.path.islink(upath):
continue
skel_path = os.path.join(skel_dir, rel)
if not files_differ(upath, skel_path, max_bytes=max_compare_bytes):
continue
if capture_file(
bundle_dir=bundle_dir,
role_name=role_name,
abs_path=upath,
reason=reason,
policy=policy,
path_filter=path_filter,
managed_out=managed_out,
excluded_out=excluded_out,
seen_role=seen_role,
seen_global=seen_global,
):
captured += 1
for rel, reason in USER_SHELL_DOTFILES_WITHOUT_SKEL_BASELINE:
upath = os.path.join(home, rel)
if not os.path.isfile(upath) or os.path.islink(upath):
continue
if capture_file(
bundle_dir=bundle_dir,
role_name=role_name,
abs_path=upath,
reason=reason,
policy=policy,
path_filter=path_filter,
managed_out=managed_out,
excluded_out=excluded_out,
seen_role=seen_role,
seen_global=seen_global,
):
captured += 1
return captured
def capture_link(
*,
role_name: str,
abs_path: str,
reason: str,
policy: IgnorePolicy,
path_filter: PathFilter,
managed_out: List[ManagedLink],
excluded_out: List[ExcludedFile],
seen_role: Optional[Set[str]] = None,
seen_global: Optional[Set[str]] = None,
) -> bool:
"""Record a symlink for later materialisation by the manifest renderer."""
if seen_global is not None and abs_path in seen_global:
return False
if seen_role is not None and abs_path in seen_role:
return False
def _mark_seen() -> None:
if seen_role is not None:
seen_role.add(abs_path)
if seen_global is not None:
seen_global.add(abs_path)
if path_filter.is_excluded(abs_path):
excluded_out.append(ExcludedFile(path=abs_path, reason="user_excluded"))
_mark_seen()
return False
deny_link = getattr(policy, "deny_reason_link", None)
if callable(deny_link):
deny = deny_link(abs_path)
else:
deny = policy.deny_reason(abs_path)
if deny in ("not_regular_file", "not_file", "not_regular"):
deny = None
if deny:
excluded_out.append(ExcludedFile(path=abs_path, reason=deny))
_mark_seen()
return False
if not os.path.islink(abs_path):
excluded_out.append(ExcludedFile(path=abs_path, reason="not_symlink"))
_mark_seen()
return False
try:
target = os.readlink(abs_path)
except OSError:
excluded_out.append(ExcludedFile(path=abs_path, reason="unreadable"))
_mark_seen()
return False
managed_out.append(ManagedLink(path=abs_path, target=target, reason=reason))
_mark_seen()
return True

File diff suppressed because it is too large Load diff

View file

@ -4,8 +4,10 @@ import os
from dataclasses import dataclass
from typing import List, Optional, Set
from .. import harvest as h
from ..harvest import ExcludedFile, ManagedFile, PackageSnapshot
from ..capture import capture_file
from ..harvest_types import ExcludedFile, ManagedFile, PackageSnapshot
from ..package_hints import package_section_from_installations
from ..system_paths import iter_matching_files
from .context import HarvestCollector
@ -97,10 +99,10 @@ class CronLogrotateCollector(HarvestCollector):
seen: Set[str] = set()
for spec in _CRON_CAPTURE_GLOBS:
for path in h._iter_matching_files(spec):
for path in iter_matching_files(spec):
if not os.path.isfile(path) or os.path.islink(path):
continue
h._capture_file(
capture_file(
bundle_dir=self.context.bundle_dir,
role_name=self.cron_role_name,
abs_path=path,
@ -116,7 +118,7 @@ class CronLogrotateCollector(HarvestCollector):
return PackageSnapshot(
package=cron_pkg,
role_name=self.cron_role_name,
section=h._package_section_from_installations(
section=package_section_from_installations(
self.context.installed_pkgs.get(cron_pkg, [])
),
managed_files=managed,
@ -131,10 +133,10 @@ class CronLogrotateCollector(HarvestCollector):
seen: Set[str] = set()
for spec in _LOGROTATE_CAPTURE_GLOBS:
for path in h._iter_matching_files(spec):
for path in iter_matching_files(spec):
if not os.path.isfile(path) or os.path.islink(path):
continue
h._capture_file(
capture_file(
bundle_dir=self.context.bundle_dir,
role_name=self.logrotate_role_name,
abs_path=path,
@ -150,7 +152,7 @@ class CronLogrotateCollector(HarvestCollector):
return PackageSnapshot(
package=logrotate_pkg,
role_name=self.logrotate_role_name,
section=h._package_section_from_installations(
section=package_section_from_installations(
self.context.installed_pkgs.get(logrotate_pkg, [])
),
managed_files=managed,

View file

@ -3,8 +3,14 @@ from __future__ import annotations
from dataclasses import dataclass
from typing import Dict, List, Set
from .. import harvest as h
from ..harvest import AptConfigSnapshot, DnfConfigSnapshot, ExcludedFile, ManagedFile
from ..capture import capture_file
from ..harvest_types import (
AptConfigSnapshot,
DnfConfigSnapshot,
ExcludedFile,
ManagedFile,
)
from ..system_paths import iter_apt_capture_paths, iter_dnf_capture_paths
from .context import HarvestCollector, HarvestContext
@ -36,8 +42,8 @@ class PackageManagerConfigCollector(HarvestCollector):
if self.context.backend.name == "dpkg":
apt_role_seen = self.seen_by_role.setdefault(apt_role_name, set())
for path, reason in h._iter_apt_capture_paths():
h._capture_file(
for path, reason in iter_apt_capture_paths():
capture_file(
bundle_dir=self.context.bundle_dir,
role_name=apt_role_name,
abs_path=path,
@ -51,8 +57,8 @@ class PackageManagerConfigCollector(HarvestCollector):
)
elif self.context.backend.name == "rpm":
dnf_role_seen = self.seen_by_role.setdefault(dnf_role_name, set())
for path, reason in h._iter_dnf_capture_paths():
h._capture_file(
for path, reason in iter_dnf_capture_paths():
capture_file(
bundle_dir=self.context.bundle_dir,
role_name=dnf_role_name,
abs_path=path,

View file

@ -5,13 +5,15 @@ import os
from typing import Dict, List, Optional, Set
from .. import harvest as h
from ..harvest import (
from ..capture import capture_file
from ..harvest_types import (
ExcludedFile,
ExtraPathsSnapshot,
ManagedDir,
ManagedFile,
UsrLocalCustomSnapshot,
)
from ..system_paths import MAX_FILES_CAP
from ..pathfilter import expand_includes
from .context import HarvestCollector, HarvestContext
@ -38,13 +40,13 @@ class UsrLocalCustomCollector(HarvestCollector):
self._scan_tree(
"/usr/local/etc",
require_executable=False,
cap=h.MAX_FILES_CAP,
cap=MAX_FILES_CAP,
reason="usr_local_etc_custom",
)
self._scan_tree(
"/usr/local/bin",
require_executable=True,
cap=h.MAX_FILES_CAP,
cap=MAX_FILES_CAP,
reason="usr_local_bin_script",
)
return UsrLocalCustomSnapshot(
@ -86,7 +88,7 @@ class UsrLocalCustomCollector(HarvestCollector):
except ValueError:
continue
if h._capture_file(
if capture_file(
bundle_dir=self.context.bundle_dir,
role_name=self.role_name,
abs_path=path,
@ -147,7 +149,7 @@ class ExtraPathsCollector(HarvestCollector):
files, inc_notes = expand_includes(
self.context.path_filter.iter_include_patterns(),
exclude=self.context.path_filter,
max_files=h.MAX_FILES_CAP,
max_files=MAX_FILES_CAP,
)
included_files = files
self.notes.extend(inc_notes)
@ -156,7 +158,7 @@ class ExtraPathsCollector(HarvestCollector):
for path in included_files:
if path in self.already_all:
continue
if h._capture_file(
if capture_file(
bundle_dir=self.context.bundle_dir,
role_name=self.role_name,
abs_path=path,
@ -198,9 +200,9 @@ class ExtraPathsCollector(HarvestCollector):
if not os.path.isdir(root) or os.path.islink(root):
return
for dirpath, dirnames, _ in os.walk(root, followlinks=False):
if len(self.managed_dirs) >= h.MAX_FILES_CAP:
if len(self.managed_dirs) >= MAX_FILES_CAP:
self.notes.append(
f"Reached directory cap ({h.MAX_FILES_CAP}) while scanning {root}."
f"Reached directory cap ({MAX_FILES_CAP}) while scanning {root}."
)
return
dirpath = os.path.normpath(dirpath)

View file

@ -5,7 +5,7 @@ from dataclasses import dataclass
from typing import List, Optional
from .. import harvest as h
from ..harvest import FirewallRuntimeSnapshot, SysctlSnapshot
from ..harvest_types import FirewallRuntimeSnapshot, SysctlSnapshot
from .context import HarvestCollector, HarvestContext

View file

@ -6,7 +6,23 @@ from dataclasses import dataclass
from typing import Dict, List, Optional, Set
from .. import harvest as h
from ..harvest import ExcludedFile, ManagedFile, PackageSnapshot, ServiceSnapshot
from ..capture import capture_file, capture_link
from ..harvest_types import ExcludedFile, ManagedFile, PackageSnapshot, ServiceSnapshot
from ..package_hints import (
SHARED_ETC_TOPDIRS,
add_pkgs_from_etc_topdirs,
hint_names,
maybe_add_specific_paths,
package_section_from_installations,
role_name_from_pkg,
role_name_from_unit,
)
from ..system_paths import (
MAX_UNOWNED_FILES_PER_ROLE,
is_confish,
scan_unowned_under_roots,
topdirs_for_package,
)
from ..systemd import UnitQueryError
from .context import HarvestCollector, HarvestContext
from .cron_logrotate import CronLogrotateCollector, _is_cron_path, _is_logrotate_path
@ -80,7 +96,7 @@ class ServicePackageCollector(HarvestCollector):
enabled_services = [
u
for u in enabled_services
if h._role_name_from_unit(u) not in blocked_roles
if role_name_from_unit(u) not in blocked_roles
]
enabled_set = set(enabled_services)
@ -106,15 +122,15 @@ class ServicePackageCollector(HarvestCollector):
}
for unit in sorted(enabled_services, key=service_sort_key):
role = h._role_name_from_unit(unit)
role = role_name_from_unit(unit)
parent_unit = parent_unit_for.get(unit)
parent_role = h._role_name_from_unit(parent_unit) if parent_unit else None
parent_role = role_name_from_unit(parent_unit) if parent_unit else None
try:
ui = h.get_unit_info(unit)
except UnitQueryError as e:
self.service_role_aliases.setdefault(
role, h._hint_names(unit, set()) | {role}
role, hint_names(unit, set()) | {role}
)
self.seen_by_role.setdefault(role, set())
managed = self.managed_by_role.setdefault(role, [])
@ -164,11 +180,11 @@ class ServicePackageCollector(HarvestCollector):
elif env_file.startswith("/etc/") and os.path.isfile(env_file):
candidates[env_file] = "systemd_envfile"
hints = h._hint_names(unit, pkgs)
h._add_pkgs_from_etc_topdirs(hints, self.context.topdir_to_pkgs, pkgs)
hints = hint_names(unit, pkgs)
add_pkgs_from_etc_topdirs(hints, self.context.topdir_to_pkgs, pkgs)
self.service_role_aliases[role] = set(hints) | set(pkgs) | {role}
for sp in h._maybe_add_specific_paths(hints, backend):
for sp in maybe_add_specific_paths(hints, backend):
if not os.path.exists(sp):
continue
if sp in self.context.etc_owner_map:
@ -193,26 +209,26 @@ class ServicePackageCollector(HarvestCollector):
confish_roots: List[str] = []
for hint in hints:
roots_for_hint = [f"/etc/{hint}", f"/etc/{hint}.d"]
if hint in h.SHARED_ETC_TOPDIRS:
if hint in SHARED_ETC_TOPDIRS:
confish_roots.extend(roots_for_hint)
else:
any_roots.extend(roots_for_hint)
found: List[str] = []
found.extend(
h._scan_unowned_under_roots(
scan_unowned_under_roots(
any_roots,
self.context.owned_etc,
limit=h.MAX_UNOWNED_FILES_PER_ROLE,
limit=MAX_UNOWNED_FILES_PER_ROLE,
confish_only=False,
)
)
if len(found) < h.MAX_UNOWNED_FILES_PER_ROLE:
if len(found) < MAX_UNOWNED_FILES_PER_ROLE:
found.extend(
h._scan_unowned_under_roots(
scan_unowned_under_roots(
confish_roots,
self.context.owned_etc,
limit=h.MAX_UNOWNED_FILES_PER_ROLE - len(found),
limit=MAX_UNOWNED_FILES_PER_ROLE - len(found),
confish_only=True,
)
)
@ -236,7 +252,7 @@ class ServicePackageCollector(HarvestCollector):
dest_managed = self.managed_by_role.setdefault(dest_role, [])
dest_excluded = self.excluded_by_role.setdefault(dest_role, [])
dest_seen = self.seen_by_role.setdefault(dest_role, set())
h._capture_file(
capture_file(
bundle_dir=self.context.bundle_dir,
role_name=dest_role,
abs_path=path,
@ -305,7 +321,7 @@ class ServicePackageCollector(HarvestCollector):
if snap is not None:
role_seen = self.seen_by_role.setdefault(snap.role_name, set())
for path in timer_paths:
h._capture_file(
capture_file(
bundle_dir=self.context.bundle_dir,
role_name=snap.role_name,
abs_path=path,
@ -374,7 +390,7 @@ class ServicePackageCollector(HarvestCollector):
manual_pkgs_skipped.append(pkg)
continue
role = h._role_name_from_pkg(pkg)
role = role_name_from_pkg(pkg)
notes: List[str] = []
excluded: List[ExcludedFile] = []
managed: List[ManagedFile] = []
@ -395,19 +411,19 @@ class ServicePackageCollector(HarvestCollector):
continue
candidates.setdefault(path, reason)
topdirs = h._topdirs_for_package(pkg, self.context.pkg_to_etc_paths)
topdirs = topdirs_for_package(pkg, self.context.pkg_to_etc_paths)
roots: List[str] = []
for topdir in sorted(topdirs):
if topdir in h.SHARED_ETC_TOPDIRS:
if topdir in SHARED_ETC_TOPDIRS:
continue
if backend.is_pkg_config_path(
f"/etc/{topdir}/"
) or backend.is_pkg_config_path(f"/etc/{topdir}"):
continue
roots.extend([f"/etc/{topdir}", f"/etc/{topdir}.d"])
roots.extend(h._maybe_add_specific_paths(set(topdirs), backend))
roots.extend(maybe_add_specific_paths(set(topdirs), backend))
for pth in h._scan_unowned_under_roots(
for pth in scan_unowned_under_roots(
[r for r in roots if os.path.isdir(r)],
self.context.owned_etc,
confish_only=False,
@ -416,12 +432,12 @@ class ServicePackageCollector(HarvestCollector):
for root in roots:
if os.path.isfile(root) and not os.path.islink(root):
if root not in self.context.owned_etc and h._is_confish(root):
if root not in self.context.owned_etc and is_confish(root):
candidates.setdefault(root, "custom_specific_path")
role_seen = self.seen_by_role.setdefault(role, set())
for path, reason in sorted(candidates.items()):
h._capture_file(
capture_file(
bundle_dir=self.context.bundle_dir,
role_name=role,
abs_path=path,
@ -445,7 +461,7 @@ class ServicePackageCollector(HarvestCollector):
PackageSnapshot(
package=pkg,
role_name=role,
section=h._package_section_from_installations(
section=package_section_from_installations(
self.context.installed_pkgs.get(pkg, [])
),
managed_files=managed,
@ -490,7 +506,7 @@ class ServicePackageCollector(HarvestCollector):
for pth in sorted(glob.glob(os.path.join(directory, "*"))):
if not os.path.islink(pth):
continue
h._capture_link(
capture_link(
role_name=role_name,
abs_path=pth,
reason="enabled_symlink",

View file

@ -4,7 +4,8 @@ from dataclasses import asdict, dataclass
from typing import Any, Dict, List, Set
from .. import harvest as h
from ..harvest import (
from ..capture import capture_file, capture_user_shell_dotfiles
from ..harvest_types import (
ExcludedFile,
FlatpakSnapshot,
ManagedFile,
@ -104,7 +105,7 @@ class UsersCollector(HarvestCollector):
if ssh_file.endswith("/authorized_keys")
else "ssh_public_key"
)
h._capture_file(
capture_file(
bundle_dir=self.context.bundle_dir,
role_name=users_role_name,
abs_path=ssh_file,
@ -121,7 +122,7 @@ class UsersCollector(HarvestCollector):
# often contain exported tokens or aliases/functions with embedded secrets.
home = (user.home or "").rstrip("/")
if home and home.startswith("/"):
h._capture_user_shell_dotfiles(
capture_user_shell_dotfiles(
bundle_dir=self.context.bundle_dir,
role_name=users_role_name,
home=home,

165
enroll/harvest_types.py Normal file
View file

@ -0,0 +1,165 @@
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional
@dataclass
class ManagedFile:
path: str
src_rel: str
owner: str
group: str
mode: str
reason: str
@dataclass
class ManagedLink:
"""A symlink we want to materialise on the target host.
For configuration enablement patterns (e.g. sites-enabled), the symlink is
meaningful state even when the link target is captured elsewhere.
"""
path: str
target: str
reason: str
@dataclass
class ManagedDir:
path: str
owner: str
group: str
mode: str
reason: str
@dataclass
class ExcludedFile:
path: str
reason: str
@dataclass
class ServiceSnapshot:
unit: str
role_name: str
packages: List[str]
active_state: Optional[str]
sub_state: Optional[str]
unit_file_state: Optional[str]
condition_result: Optional[str]
managed_dirs: List[ManagedDir] = field(default_factory=list)
managed_files: List[ManagedFile] = field(default_factory=list)
managed_links: List[ManagedLink] = field(default_factory=list)
excluded: List[ExcludedFile] = field(default_factory=list)
notes: List[str] = field(default_factory=list)
@dataclass
class PackageSnapshot:
package: str
role_name: str
section: Optional[str] = None
managed_dirs: List[ManagedDir] = field(default_factory=list)
managed_files: List[ManagedFile] = field(default_factory=list)
managed_links: List[ManagedLink] = field(default_factory=list)
excluded: List[ExcludedFile] = field(default_factory=list)
notes: List[str] = field(default_factory=list)
has_config: bool = True # False if package has no config/systemd/cron files
@dataclass
class UsersSnapshot:
role_name: str
users: List[dict]
managed_dirs: List[ManagedDir] = field(default_factory=list)
managed_files: List[ManagedFile] = field(default_factory=list)
excluded: List[ExcludedFile] = field(default_factory=list)
notes: List[str] = field(default_factory=list)
user_flatpaks: Dict[str, List[Dict[str, Any]]] = field(default_factory=dict)
user_flatpak_remotes: List[Dict[str, Any]] = field(default_factory=list)
@dataclass
class FlatpakSnapshot:
role_name: str
system_flatpaks: List[Dict[str, Any]] = field(default_factory=list)
remotes: List[Dict[str, Any]] = field(default_factory=list)
notes: List[str] = field(default_factory=list)
@dataclass
class SnapSnapshot:
role_name: str
system_snaps: List[Dict[str, Any]] = field(default_factory=list)
notes: List[str] = field(default_factory=list)
@dataclass
class AptConfigSnapshot:
role_name: str
managed_dirs: List[ManagedDir] = field(default_factory=list)
managed_files: List[ManagedFile] = field(default_factory=list)
excluded: List[ExcludedFile] = field(default_factory=list)
notes: List[str] = field(default_factory=list)
@dataclass
class DnfConfigSnapshot:
role_name: str
managed_dirs: List[ManagedDir] = field(default_factory=list)
managed_files: List[ManagedFile] = field(default_factory=list)
excluded: List[ExcludedFile] = field(default_factory=list)
notes: List[str] = field(default_factory=list)
@dataclass
class EtcCustomSnapshot:
role_name: str
managed_dirs: List[ManagedDir] = field(default_factory=list)
managed_files: List[ManagedFile] = field(default_factory=list)
excluded: List[ExcludedFile] = field(default_factory=list)
notes: List[str] = field(default_factory=list)
@dataclass
class UsrLocalCustomSnapshot:
role_name: str
managed_dirs: List[ManagedDir] = field(default_factory=list)
managed_files: List[ManagedFile] = field(default_factory=list)
excluded: List[ExcludedFile] = field(default_factory=list)
notes: List[str] = field(default_factory=list)
@dataclass
class ExtraPathsSnapshot:
role_name: str
include_patterns: List[str] = field(default_factory=list)
exclude_patterns: List[str] = field(default_factory=list)
managed_dirs: List[ManagedDir] = field(default_factory=list)
managed_files: List[ManagedFile] = field(default_factory=list)
managed_links: List[ManagedLink] = field(default_factory=list)
excluded: List[ExcludedFile] = field(default_factory=list)
notes: List[str] = field(default_factory=list)
@dataclass
class FirewallRuntimeSnapshot:
role_name: str
packages: List[str] = field(default_factory=list)
ipset_save: Optional[str] = None
ipset_sets: List[str] = field(default_factory=list)
iptables_v4_save: Optional[str] = None
iptables_v6_save: Optional[str] = None
notes: List[str] = field(default_factory=list)
@dataclass
class SysctlSnapshot:
role_name: str
managed_files: List[ManagedFile] = field(default_factory=list)
parameters: Dict[str, str] = field(default_factory=dict)
notes: List[str] = field(default_factory=list)

126
enroll/package_hints.py Normal file
View file

@ -0,0 +1,126 @@
from __future__ import annotations
import re
from typing import Dict, List, Optional, Set
from .role_names import avoid_reserved_role_name
# Directories that are shared across many packages. Never attribute all unowned
# files in these trees to one single package.
SHARED_ETC_TOPDIRS = {
"apparmor.d",
"apt",
"cron.d",
"cron.daily",
"cron.weekly",
"cron.monthly",
"cron.hourly",
"default",
"init.d",
"logrotate.d",
"modprobe.d",
"network",
"pam.d",
"ssh",
"ssl",
"sudoers.d",
"sysctl.d",
"systemd",
# RPM-family shared trees
"dnf",
"yum",
"yum.repos.d",
"sysconfig",
"pki",
"firewalld",
}
def safe_name(s: str) -> str:
out: List[str] = []
for ch in s:
out.append(ch if ch.isalnum() or ch in ("_", "-") else "_")
return "".join(out).replace("-", "_")
def role_id(raw: str) -> str:
# normalise separators first
s = re.sub(r"[^A-Za-z0-9]+", "_", raw)
# split CamelCase -> snake_case
s = re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", s)
s = s.lower()
s = re.sub(r"_+", "_", s).strip("_")
if not re.match(r"^[a-z_]", s):
s = "r_" + s
return s
def role_name_from_unit(unit: str) -> str:
base = role_id(unit.removesuffix(".service"))
return avoid_reserved_role_name(safe_name(base), prefix="service")
def role_name_from_pkg(pkg: str) -> str:
return avoid_reserved_role_name(safe_name(pkg), prefix="package")
def package_section_from_installations(
installs: List[Dict[str, str]],
) -> Optional[str]:
"""Return a stable package grouping label from installed package metadata."""
values: Set[str] = set()
for inst in installs or []:
value = (inst.get("section") or inst.get("group") or "").strip()
if not value:
continue
if value.lower() in {"(none)", "none", "unspecified"}:
continue
values.add(value)
if not values:
return None
return sorted(values)[0]
def hint_names(unit: str, pkgs: Set[str]) -> Set[str]:
base = unit.removesuffix(".service")
hints = {base}
if "@" in base:
hints.add(base.split("@", 1)[0])
hints |= set(pkgs)
hints |= {h.split(".", 1)[0] for h in list(hints) if "." in h}
return {h for h in hints if h}
def add_pkgs_from_etc_topdirs(
hints: Set[str], topdir_to_pkgs: Dict[str, Set[str]], pkgs: Set[str]
) -> None:
"""Expand a service's package set using package-owned /etc top-level dirs."""
for h in hints:
for top in (h, f"{h}.d"):
if top in SHARED_ETC_TOPDIRS:
continue
for p in topdir_to_pkgs.get(top, set()):
pkgs.add(p)
def maybe_add_specific_paths(hints: Set[str], backend) -> List[str]:
# Delegate to backend-specific conventions (e.g. /etc/default on Debian,
# /etc/sysconfig on Fedora/RHEL). Always include sysctl.d.
try:
return backend.specific_paths_for_hints(hints)
except Exception:
# Best-effort fallback (Debian-ish).
paths: List[str] = []
for h in hints:
paths.extend(
[
f"/etc/default/{h}",
f"/etc/init.d/{h}",
f"/etc/sysctl.d/{h}.conf",
]
)
return paths

View file

@ -4,7 +4,9 @@ import json
import re
import shutil
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Tuple
from typing import Any, Dict, Iterable, List, Optional, Set, Tuple
import yaml
from .cm import (
CMModule,
@ -87,6 +89,7 @@ class PuppetRole(CMModule):
bundle_dir: str,
artifact_role: str,
module_files_dir: Path,
file_prefix: Optional[str] = None,
) -> None:
for d in self.managed_dirs_from_snapshot(snap):
path = str(d.get("path") or "").strip()
@ -104,7 +107,11 @@ class PuppetRole(CMModule):
if not path or not src_rel:
continue
module_rel = _copy_artifact(
bundle_dir, artifact_role, src_rel, module_files_dir
bundle_dir,
artifact_role,
src_rel,
module_files_dir,
dst_prefix=file_prefix,
)
if not module_rel:
self.notes.append(
@ -203,17 +210,23 @@ def _resource(
def _copy_artifact(
bundle_dir: str, role: str, src_rel: str, dst_files_dir: Path
bundle_dir: str,
role: str,
src_rel: str,
dst_files_dir: Path,
*,
dst_prefix: Optional[str] = None,
) -> Optional[str]:
if not role or not src_rel:
return None
src = Path(bundle_dir) / "artifacts" / role / src_rel
if not src.is_file():
return None
dst = dst_files_dir / src_rel
module_rel = Path(dst_prefix or "") / src_rel
dst = dst_files_dir / module_rel
dst.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(src, dst)
return Path(src_rel).as_posix()
return module_rel.as_posix()
def _source_uri(module_name: str, module_rel: str) -> str:
@ -237,6 +250,21 @@ def _add_flatpak_snap_notes(roles: Dict[str, Any], out: Dict[str, PuppetRole]) -
)
def _node_data_filename(fqdn: str) -> str:
"""Return a safe Hiera node-data filename for an FQDN/certname."""
name = str(fqdn or "").strip().replace("/", "_").replace("\\", "_")
return f"{name or 'node'}.yaml"
def _node_file_prefix(fqdn: str) -> str:
"""Return a safe module-files prefix for node-specific artifacts."""
name = re.sub(r"[^A-Za-z0-9_.-]+", "_", str(fqdn or "").strip())
name = name.strip("._-") or "node"
return f"nodes/{name}"
def _collect_puppet_roles(
state: Dict[str, Any],
bundle_dir: str,
@ -248,6 +276,7 @@ def _collect_puppet_roles(
roles = roles_from_state(state)
inventory_packages = inventory_packages_from_state(state)
use_common_modules = not fqdn and not no_common_roles
node_file_prefix = _node_file_prefix(fqdn) if fqdn else None
out: Dict[str, PuppetRole] = {}
def ensure_role(role_name: str) -> PuppetRole:
@ -275,6 +304,7 @@ def _collect_puppet_roles(
bundle_dir=bundle_dir,
artifact_role=str(snap.get("role_name") or key),
module_files_dir=module_files_dir,
file_prefix=node_file_prefix,
)
users_snap = roles.get("users") or {}
@ -289,6 +319,7 @@ def _collect_puppet_roles(
bundle_dir=bundle_dir,
artifact_role=str(users_snap.get("role_name") or "users"),
module_files_dir=modules_dir / prole.module_name / "files",
file_prefix=node_file_prefix,
)
for svc in roles.get("services", []) or []:
@ -319,6 +350,7 @@ def _collect_puppet_roles(
bundle_dir=bundle_dir,
artifact_role=str(svc.get("role_name") or original_role_name),
module_files_dir=modules_dir / prole.module_name / "files",
file_prefix=node_file_prefix,
)
for pkg in roles.get("packages", []) or []:
@ -342,6 +374,7 @@ def _collect_puppet_roles(
bundle_dir=bundle_dir,
artifact_role=str(pkg.get("role_name") or original_role_name),
module_files_dir=modules_dir / prole.module_name / "files",
file_prefix=node_file_prefix,
)
fw = roles.get("firewall_runtime") or {}
@ -489,6 +522,164 @@ def _render_role_class(prole: PuppetRole) -> str:
return "\n".join(lines)
def _attrs_with_ensure(attrs: Dict[str, Any], ensure: str) -> Dict[str, Any]:
out = {"ensure": ensure}
out.update(attrs)
return out
def _role_hiera_values(prole: PuppetRole) -> Dict[str, Any]:
"""Return Automatic Parameter Lookup data for one generated module."""
data: Dict[str, Any] = {}
prefix = f"{prole.module_name}::"
if prole.packages:
data[f"{prefix}packages"] = sorted(prole.packages)
if prole.groups:
data[f"{prefix}groups"] = {
group: {"ensure": "present"} for group in sorted(prole.groups)
}
if prole.users:
users: Dict[str, Dict[str, Any]] = {}
for name in sorted(prole.users):
user = prole.users[name]
attrs: Dict[str, Any] = {"ensure": "present", "managehome": True}
if user.get("uid") is not None:
attrs["uid"] = user["uid"]
if user.get("primary_group"):
attrs["gid"] = user["primary_group"]
if user.get("home"):
attrs["home"] = user["home"]
if user.get("shell"):
attrs["shell"] = user["shell"]
if user.get("gecos"):
attrs["comment"] = user["gecos"]
if user.get("supplementary_groups"):
attrs["groups"] = list(user["supplementary_groups"])
attrs["membership"] = "minimum"
users[name] = attrs
data[f"{prefix}users"] = users
if prole.dirs:
data[f"{prefix}dirs"] = {
path: _attrs_with_ensure(prole.dirs[path], "directory")
for path in sorted(prole.dirs)
}
if prole.files:
data[f"{prefix}files"] = {
path: _attrs_with_ensure(prole.files[path], "file")
for path in sorted(prole.files)
}
if prole.links:
data[f"{prefix}links"] = {
path: _attrs_with_ensure(prole.links[path], "link")
for path in sorted(prole.links)
}
if prole.services:
data[f"{prefix}services"] = {
name: {
"ensure": prole.services[name].get("ensure") or "stopped",
"enable": bool(prole.services[name].get("enable")),
}
for name in sorted(prole.services)
}
if prole.notes:
data[f"{prefix}notes"] = list(prole.notes)
if "/etc/sysctl.d/99-enroll.conf" in prole.files:
data[f"{prefix}sysctl_apply"] = True
data[f"{prefix}sysctl_ignore_apply_errors"] = True
return data
def _render_hiera_role_class(prole: PuppetRole) -> str:
"""Render a reusable, data-driven Puppet class for --fqdn/Hiera mode."""
lines: List[str] = [
"# Generated by Enroll from harvest state.",
"# Resource data is supplied by Hiera Automatic Parameter Lookup.",
f"class {prole.module_name} (",
" Array[String] $packages = [],",
" Hash[String, Hash] $groups = {},",
" Hash[String, Hash] $users = {},",
" Hash[String, Hash] $dirs = {},",
" Hash[String, Hash] $files = {},",
" Hash[String, Hash] $links = {},",
" Hash[String, Hash] $services = {},",
" Array[String] $notes = [],",
" Boolean $sysctl_apply = true,",
" Boolean $sysctl_ignore_apply_errors = true,",
") {",
"",
" $packages.each |String $package_name| {",
" package { $package_name:",
" ensure => 'installed',",
" }",
" }",
"",
" $groups.each |String $resource_title, Hash $attrs| {",
" group { $resource_title:",
" * => $attrs,",
" }",
" }",
"",
" $users.each |String $resource_title, Hash $attrs| {",
" user { $resource_title:",
" * => $attrs,",
" }",
" }",
"",
" $dirs.each |String $resource_title, Hash $attrs| {",
" file { $resource_title:",
" * => $attrs,",
" }",
" }",
"",
" $files.each |String $resource_title, Hash $attrs| {",
" file { $resource_title:",
" * => $attrs,",
" }",
" }",
"",
" $links.each |String $resource_title, Hash $attrs| {",
" file { $resource_title:",
" * => $attrs,",
" }",
" }",
"",
" $services.each |String $resource_title, Hash $attrs| {",
" service { $resource_title:",
" * => $attrs,",
" }",
" }",
"",
" if $sysctl_apply and $files.has_key('/etc/sysctl.d/99-enroll.conf') {",
" exec { 'enroll-apply-sysctl':",
" command => $sysctl_ignore_apply_errors ? {",
" true => \"/bin/sh -c 'sysctl -e -p /etc/sysctl.d/99-enroll.conf || true'\",",
" default => 'sysctl -e -p /etc/sysctl.d/99-enroll.conf',",
" },",
" path => ['/sbin', '/usr/sbin', '/bin', '/usr/bin'],",
" refreshonly => true,",
" subscribe => File['/etc/sysctl.d/99-enroll.conf'],",
" }",
" }",
"",
" # Generated notes are supplied through the $notes parameter for review.",
"}",
"",
]
return "\n".join(lines)
def _render_site_pp(puppet_roles: List[PuppetRole], fqdn: Optional[str]) -> str:
node_name = _pp_quote(fqdn) if fqdn else "default"
if not puppet_roles:
@ -497,6 +688,91 @@ def _render_site_pp(puppet_roles: List[PuppetRole], fqdn: Optional[str]) -> str:
return f"node {node_name} {{\n{includes}\n}}\n"
def _render_hiera_site_pp(node_names: List[str]) -> str:
lines: List[str] = [
"# Generated by Enroll from harvest state.",
"# Per-node class lists and resources are read from Hiera data.",
"",
]
for node_name in node_names:
lines.extend(
[
f"node {_pp_quote(node_name)} {{",
" $enroll_classes = lookup('enroll::classes', Array[String], 'unique', [])",
" $enroll_classes.each |String $enroll_class| {",
" include $enroll_class",
" }",
"}",
"",
]
)
lines.extend(
[
"node default {",
" $enroll_classes = lookup('enroll::classes', Array[String], 'unique', [])",
" $enroll_classes.each |String $enroll_class| {",
" include $enroll_class",
" }",
"}",
"",
]
)
return "\n".join(lines)
def _render_hiera_yaml() -> str:
data = {
"version": 5,
"defaults": {"datadir": "data", "data_hash": "yaml_data"},
"hierarchy": [
{
"name": "Enroll trusted certname node data",
"path": "nodes/%{trusted.certname}.yaml",
},
{
"name": "Enroll networking FQDN node data",
"path": "nodes/%{facts.networking.fqdn}.yaml",
},
{"name": "Enroll common data", "path": "common.yaml"},
],
}
return yaml.safe_dump(data, sort_keys=False, explicit_start=True)
def _write_yaml(path: Path, data: Dict[str, Any]) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(
yaml.safe_dump(data, sort_keys=True, explicit_start=True),
encoding="utf-8",
)
def _write_hiera_node_data(
out: Path, fqdn: str, puppet_roles: List[PuppetRole]
) -> Path:
node_data: Dict[str, Any] = {
"enroll::classes": [r.module_name for r in puppet_roles]
}
for prole in puppet_roles:
node_data.update(_role_hiera_values(prole))
node_path = out / "data" / "nodes" / _node_data_filename(fqdn)
_write_yaml(node_path, node_data)
common_path = out / "data" / "common.yaml"
if not common_path.exists():
_write_yaml(common_path, {"enroll::classes": []})
return node_path
def _hiera_node_names(out: Path) -> List[str]:
nodes_dir = out / "data" / "nodes"
if not nodes_dir.is_dir():
return []
out_names: Set[str] = set()
for path in nodes_dir.glob("*.yaml"):
out_names.add(path.name[: -len(".yaml")])
return sorted(out_names)
def _write_metadata(module_dir: Path, module_name: str) -> None:
(module_dir / "metadata.json").write_text(
json.dumps(
@ -517,9 +793,16 @@ def _write_metadata(module_dir: Path, module_name: str) -> None:
)
def _render_readme(state: Dict[str, Any], puppet_roles: List[PuppetRole]) -> str:
def _render_readme(
state: Dict[str, Any],
puppet_roles: List[PuppetRole],
*,
fqdn: Optional[str] = None,
node_names: Optional[List[str]] = None,
) -> str:
host = state.get("host", {}) if isinstance(state.get("host"), dict) else {}
hostname = host.get("hostname") or "unknown"
hiera_mode = bool(fqdn)
role_lines = (
"\n".join(
f"- `{r.module_name}` from Enroll role `{r.role_name}`"
@ -527,11 +810,39 @@ def _render_readme(state: Dict[str, Any], puppet_roles: List[PuppetRole]) -> str
)
or "- None."
)
node_lines = "\n".join(f"- `{n}`" for n in (node_names or [])) or "- None."
notes: List[str] = []
for r in puppet_roles:
for note in r.notes:
notes.append(f"`{r.module_name}`: {note}")
notes_text = "\n".join(f"- {n}" for n in notes) or "- None."
if hiera_mode:
layout = f"""- `manifests/site.pp` declares node blocks and includes classes listed in Hiera key `enroll::classes`.
- `hiera.yaml` configures per-node lookup from `data/nodes/%{{trusted.certname}}.yaml` with a fallback to `data/common.yaml`.
- `data/nodes/{_node_data_filename(fqdn or '')}` contains this node's class list and class parameter data.
- `modules/<role>/manifests/init.pp` contains reusable, data-driven classes.
- `modules/<role>/files/nodes/<fqdn>/...` contains node-specific harvested file artifacts, avoiding clashes between hosts."""
apply = f"""Run from this generated output directory, passing the node certname so Hiera selects the right node data:
```bash
sudo puppet apply --modulepath ./modules --hiera_config ./hiera.yaml --certname {fqdn} manifests/site.pp --noop
```
For Puppet agent/control-repo use, place this output where `hiera.yaml`, `data/`, `manifests/`, and `modules/` form the environment root. Re-running Enroll with another `--fqdn` into the same output directory adds or replaces that node's YAML without deleting existing node data."""
else:
layout = """- `manifests/site.pp` declares a `node` block and includes the generated classes in manifest order.
- `modules/<role>/manifests/init.pp` contains resources for each generated Enroll role/snapshot or common package group.
- `modules/<role>/files/` contains harvested file artifacts for that role or group.
- Generated module names avoid Puppet reserved words such as `default`."""
apply = """Run from this generated output directory so Puppet can find `./modules`, or pass an absolute module path:
```bash
sudo puppet apply --modulepath ./modules manifests/site.pp --noop
```
```bash
sudo puppet apply --modulepath /path/to/generated/modules /path/to/generated/manifests/site.pp --noop
```"""
return f"""# Enroll Puppet manifest
Generated by Enroll from harvest data for `{hostname}`.
@ -540,10 +851,11 @@ This Puppet target reuses the existing harvest state without changing harvesting
## Layout
- `manifests/site.pp` declares a `node` block and includes the generated classes in manifest order.
- `modules/<role>/manifests/init.pp` contains resources for each generated Enroll role/snapshot or common package group.
- `modules/<role>/files/` contains harvested file artifacts for that role or group.
- Generated module names avoid Puppet reserved words such as `default`.
{layout}
## Known nodes
{node_lines if hiera_mode else '- Non-Hiera single-node output.'}
## Generated modules
@ -551,15 +863,7 @@ This Puppet target reuses the existing harvest state without changing harvesting
## Apply / check
Run from this generated output directory so Puppet can find `./modules`, or pass an absolute module path:
```bash
sudo puppet apply --modulepath ./modules manifests/site.pp --noop
```
```bash
sudo puppet apply --modulepath /path/to/generated/modules /path/to/generated/manifests/site.pp --noop
```
{apply}
## Generated resources
@ -607,7 +911,8 @@ class PuppetManifestRenderer:
state = PuppetRole.load_state(bundle_dir)
out = Path(out_dir)
if out.exists():
hiera_mode = bool(fqdn)
if out.exists() and not hiera_mode:
shutil.rmtree(out)
manifests_dir = out / "manifests"
modules_dir = out / "modules"
@ -628,15 +933,35 @@ class PuppetManifestRenderer:
module_manifests.mkdir(parents=True, exist_ok=True)
module_files.mkdir(parents=True, exist_ok=True)
(module_manifests / "init.pp").write_text(
_render_role_class(prole), encoding="utf-8"
(
_render_hiera_role_class(prole)
if hiera_mode
else _render_role_class(prole)
),
encoding="utf-8",
)
_write_metadata(module_dir, prole.module_name)
node_names: List[str] = []
if hiera_mode and fqdn:
(out / "hiera.yaml").write_text(_render_hiera_yaml(), encoding="utf-8")
_write_hiera_node_data(out, fqdn, puppet_roles)
node_names = _hiera_node_names(out)
(manifests_dir / "site.pp").write_text(
_render_hiera_site_pp(node_names), encoding="utf-8"
)
else:
(manifests_dir / "site.pp").write_text(
_render_site_pp(puppet_roles, fqdn), encoding="utf-8"
)
(out / "README.md").write_text(
_render_readme(state, puppet_roles), encoding="utf-8"
_render_readme(
state,
puppet_roles,
fqdn=fqdn,
node_names=node_names,
),
encoding="utf-8",
)

313
enroll/system_paths.py Normal file
View file

@ -0,0 +1,313 @@
from __future__ import annotations
import glob
import os
import re
from typing import Dict, List, Set, Tuple
ALLOWED_UNOWNED_EXTS = {
".cfg",
".cnf",
".conf",
".ini",
".json",
".link",
".mount",
".netdev",
".network",
".path",
".rules",
".service",
".socket",
".target",
".timer",
".toml",
".yaml",
".yml",
"", # allow extensionless (common in /etc/default and /etc/init.d)
}
MAX_FILES_CAP = 4000
MAX_UNOWNED_FILES_PER_ROLE = 500
def is_confish(path: str) -> bool:
base = os.path.basename(path)
_, ext = os.path.splitext(base)
return ext in ALLOWED_UNOWNED_EXTS
def scan_unowned_under_roots(
roots: List[str],
owned_etc: Set[str],
limit: int = MAX_UNOWNED_FILES_PER_ROLE,
*,
confish_only: bool = True,
) -> List[str]:
found: List[str] = []
for root in roots:
if not os.path.isdir(root):
continue
for dirpath, _, filenames in os.walk(root):
if len(found) >= limit:
return found
for fn in filenames:
if len(found) >= limit:
return found
p = os.path.join(dirpath, fn)
if not p.startswith("/etc/"):
continue
if p in owned_etc:
continue
if not os.path.isfile(p) or os.path.islink(p):
continue
if confish_only and not is_confish(p):
continue
found.append(p)
return found
def topdirs_for_package(pkg: str, pkg_to_etc_paths: Dict[str, List[str]]) -> Set[str]:
topdirs: Set[str] = set()
for path in pkg_to_etc_paths.get(pkg, []):
parts = path.split("/", 3)
if len(parts) >= 3 and parts[1] == "etc" and parts[2]:
topdirs.add(parts[2])
return topdirs
_APT_SOURCE_GLOBS = [
"/etc/apt/sources.list",
"/etc/apt/sources.list.d/*.list",
"/etc/apt/sources.list.d/*.sources",
]
_SYSTEM_CAPTURE_GLOBS: List[Tuple[str, str]] = [
("/etc/fstab", "system_mounts"),
("/etc/crypttab", "system_mounts"),
("/etc/sysctl.conf", "system_sysctl"),
("/etc/sysctl.d/*", "system_sysctl"),
("/etc/modprobe.d/*", "system_modprobe"),
("/etc/modules", "system_modprobe"),
("/etc/modules-load.d/*", "system_modprobe"),
("/etc/netplan/*", "system_network"),
("/etc/systemd/network/*", "system_network"),
("/etc/network/interfaces", "system_network"),
("/etc/network/interfaces.d/*", "system_network"),
("/etc/resolvconf.conf", "system_network"),
("/etc/resolvconf/resolv.conf.d/*", "system_network"),
("/etc/NetworkManager/system-connections/*", "system_network"),
("/etc/sysconfig/network*", "system_network"),
("/etc/sysconfig/network-scripts/*", "system_network"),
("/etc/nftables.conf", "system_firewall"),
("/etc/nftables.d/*", "system_firewall"),
("/etc/iptables/rules.v4", "system_firewall"),
("/etc/iptables/rules.v6", "system_firewall"),
("/etc/sysconfig/iptables", "system_firewall"),
("/etc/sysconfig/ip6tables", "system_firewall"),
("/etc/ipset.conf", "system_firewall"),
("/etc/ipset/*", "system_firewall"),
("/etc/ipset.d/*", "system_firewall"),
("/etc/sysconfig/ipset", "system_firewall"),
("/etc/default/ipset", "system_firewall"),
("/etc/ufw/*", "system_firewall"),
("/etc/default/ufw", "system_firewall"),
("/etc/firewalld/*", "system_firewall"),
("/etc/firewalld/zones/*", "system_firewall"),
("/etc/selinux/config", "system_security"),
("/etc/rc.local", "system_rc"),
]
_PERSISTENT_IPTABLES_V4_GLOBS = [
"/etc/iptables/rules.v4",
"/etc/sysconfig/iptables",
]
_PERSISTENT_IPTABLES_V6_GLOBS = [
"/etc/iptables/rules.v6",
"/etc/sysconfig/ip6tables",
]
_PERSISTENT_IPSET_GLOBS = [
"/etc/ipset.conf",
"/etc/ipset/*",
"/etc/ipset.d/*",
"/etc/sysconfig/ipset",
]
def persistent_ipset_globs() -> List[str]:
return list(_PERSISTENT_IPSET_GLOBS)
def persistent_iptables_v4_globs() -> List[str]:
return list(_PERSISTENT_IPTABLES_V4_GLOBS)
def persistent_iptables_v6_globs() -> List[str]:
return list(_PERSISTENT_IPTABLES_V6_GLOBS)
def persistent_firewall_files(globs: List[str]) -> List[str]:
"""Return persistent firewall files matching ``globs``."""
seen: Set[str] = set()
out: List[str] = []
for spec in globs:
for path in iter_matching_files(spec):
if path in seen:
continue
seen.add(path)
out.append(path)
return sorted(out)
def iter_matching_files(spec: str, *, cap: int = MAX_FILES_CAP) -> List[str]:
"""Expand a glob spec and also walk directories to collect files."""
out: List[str] = []
for p in glob.glob(spec):
if len(out) >= cap:
break
if os.path.islink(p):
continue
if os.path.isfile(p):
out.append(p)
continue
if os.path.isdir(p):
for dirpath, _, filenames in os.walk(p):
for fn in filenames:
if len(out) >= cap:
break
fp = os.path.join(dirpath, fn)
if os.path.islink(fp) or not os.path.isfile(fp):
continue
out.append(fp)
if len(out) >= cap:
break
return out
def parse_apt_signed_by(source_files: List[str]) -> Set[str]:
"""Return absolute keyring paths referenced via signed-by / Signed-By."""
out: Set[str] = set()
re_signed_by = re.compile(r"signed-by\s*=\s*([^\]\s]+)", re.IGNORECASE)
re_signed_by_hdr = re.compile(r"^\s*Signed-By\s*:\s*(.+)$", re.IGNORECASE)
for sf in source_files:
try:
with open(sf, "r", encoding="utf-8", errors="replace") as f:
for raw in f:
line = raw.strip()
if not line or line.startswith("#"):
continue
m = re_signed_by_hdr.match(line)
if m:
val = m.group(1).strip()
if val.startswith("|"):
continue
toks = re.split(r"[\s,]+", val)
for t in toks:
if t.startswith("/"):
out.add(t)
continue
if "[" in line and "]" in line:
bracket = line.split("[", 1)[1].split("]", 1)[0]
for mm in re_signed_by.finditer(bracket):
val = mm.group(1).strip().strip("\"'")
for t in re.split(r"[\s,]+", val):
if t.startswith("/"):
out.add(t)
continue
for mm in re_signed_by.finditer(line):
val = mm.group(1).strip().strip("\"'")
for t in re.split(r"[\s,]+", val):
if t.startswith("/"):
out.add(t)
except OSError:
continue
return out
def iter_apt_capture_paths() -> List[Tuple[str, str]]:
"""Return (path, reason) pairs for APT configuration."""
reasons: Dict[str, str] = {}
if os.path.isdir("/etc/apt"):
for dirpath, _, filenames in os.walk("/etc/apt"):
for fn in filenames:
p = os.path.join(dirpath, fn)
if os.path.islink(p) or not os.path.isfile(p):
continue
reasons.setdefault(p, "apt_config")
apt_sources: List[str] = []
for g in _APT_SOURCE_GLOBS:
apt_sources.extend(iter_matching_files(g))
for p in sorted(set(apt_sources)):
reasons[p] = "apt_source"
for g in (
"/etc/apt/trusted.gpg",
"/etc/apt/trusted.gpg.d/*",
"/etc/apt/keyrings/*",
):
for p in iter_matching_files(g):
reasons[p] = "apt_keyring"
signed_by = parse_apt_signed_by(sorted(set(apt_sources)))
for p in sorted(signed_by):
if os.path.islink(p) or not os.path.isfile(p):
continue
if p.startswith("/etc/apt/"):
reasons[p] = "apt_keyring"
else:
reasons[p] = "apt_signed_by_keyring"
return [(p, reasons[p]) for p in sorted(reasons.keys())]
def iter_dnf_capture_paths() -> List[Tuple[str, str]]:
"""Return (path, reason) pairs for DNF/YUM configuration on RPM systems."""
reasons: Dict[str, str] = {}
for root, tag in (
("/etc/dnf", "dnf_config"),
("/etc/yum", "yum_config"),
):
if os.path.isdir(root):
for dirpath, _, filenames in os.walk(root):
for fn in filenames:
p = os.path.join(dirpath, fn)
if os.path.islink(p) or not os.path.isfile(p):
continue
reasons.setdefault(p, tag)
for p in iter_matching_files("/etc/yum.conf"):
reasons[p] = "yum_conf"
for p in iter_matching_files("/etc/yum.repos.d/*.repo"):
reasons[p] = "yum_repo"
for p in iter_matching_files("/etc/pki/rpm-gpg/*"):
reasons[p] = "rpm_gpg_key"
return [(p, reasons[p]) for p in sorted(reasons.keys())]
def iter_system_capture_paths() -> List[Tuple[str, str]]:
out: List[Tuple[str, str]] = []
seen: Set[str] = set()
for spec, reason in _SYSTEM_CAPTURE_GLOBS:
for path in iter_matching_files(spec):
if path in seen:
continue
seen.add(path)
out.append((path, reason))
return sorted(out, key=lambda x: x[0])

View file

@ -5,21 +5,28 @@ import pytest
from pathlib import Path
import enroll.harvest as harvest
import enroll.system_paths as system_paths
from enroll.platform import PlatformInfo
from enroll.systemd import UnitInfo
from enroll.pathfilter import PathFilter
from enroll.harvest import (
_is_confish,
_hint_names,
_topdirs_for_package,
_iter_matching_files,
_parse_apt_signed_by,
_capture_link,
_capture_file,
ManagedFile,
ManagedLink,
ExcludedFile,
IgnorePolicy,
import enroll.capture as capture
from enroll.capture import (
capture_file as _capture_file,
capture_link as _capture_link,
capture_user_shell_dotfiles,
files_differ,
)
from enroll.harvest_types import ExcludedFile, ManagedFile, ManagedLink
from enroll.ignore import IgnorePolicy
from enroll.package_hints import (
add_pkgs_from_etc_topdirs,
hint_names as _hint_names,
)
from enroll.system_paths import (
is_confish as _is_confish,
iter_matching_files as _iter_matching_files,
parse_apt_signed_by as _parse_apt_signed_by,
topdirs_for_package as _topdirs_for_package,
)
from unittest.mock import MagicMock
@ -249,6 +256,7 @@ def test_harvest_dedup_manual_packages_and_builds_etc_custom(
return ("root", "root", "0644")
monkeypatch.setattr(harvest, "stat_triplet", fake_stat_triplet)
monkeypatch.setattr(capture, "stat_triplet", fake_stat_triplet)
# Avoid needing source files on disk by implementing our own bundle copier
def fake_copy(bundle_dir: str, role_name: str, abs_path: str, src_rel: str):
@ -256,7 +264,7 @@ def test_harvest_dedup_manual_packages_and_builds_etc_custom(
dst.parent.mkdir(parents=True, exist_ok=True)
dst.write_bytes(files.get(abs_path, b""))
monkeypatch.setattr(harvest, "_copy_into_bundle", fake_copy)
monkeypatch.setattr(capture, "copy_into_bundle", fake_copy)
state_path = harvest.harvest(str(bundle), policy=AllowAllPolicy())
st = json.loads(Path(state_path).read_text(encoding="utf-8"))
@ -327,8 +335,8 @@ def test_shared_cron_snippet_prefers_matching_role_over_lexicographic(
# Only include the cron snippet in the system capture set.
monkeypatch.setattr(
harvest,
"_iter_system_capture_paths",
system_paths,
"iter_system_capture_paths",
lambda: [("/etc/cron.d/ntpsec", "system_cron")],
)
@ -392,6 +400,7 @@ def test_shared_cron_snippet_prefers_matching_role_over_lexicographic(
monkeypatch.setattr(harvest, "get_backend", lambda info=None: backend)
monkeypatch.setattr(harvest, "stat_triplet", lambda p: ("root", "root", "0644"))
monkeypatch.setattr(capture, "stat_triplet", lambda p: ("root", "root", "0644"))
monkeypatch.setattr(harvest, "collect_non_system_users", lambda: [])
def fake_copy(bundle_dir: str, role_name: str, abs_path: str, src_rel: str):
@ -399,7 +408,7 @@ def test_shared_cron_snippet_prefers_matching_role_over_lexicographic(
dst.parent.mkdir(parents=True, exist_ok=True)
dst.write_bytes(files[abs_path])
monkeypatch.setattr(harvest, "_copy_into_bundle", fake_copy)
monkeypatch.setattr(capture, "copy_into_bundle", fake_copy)
state_path = harvest.harvest(str(bundle), policy=AllowAllPolicy())
st = json.loads(Path(state_path).read_text(encoding="utf-8"))
@ -421,7 +430,7 @@ def test_files_differ_binary(tmp_path: Path):
file2 = tmp_path / "file2.bin"
file1.write_bytes(b"\x00\x01\x02\x03")
file2.write_bytes(b"\x00\x01\x02\x03")
assert harvest._files_differ(str(file1), str(file2)) is False
assert files_differ(str(file1), str(file2)) is False
def test_files_differ_binary_different(tmp_path: Path):
@ -429,7 +438,7 @@ def test_files_differ_binary_different(tmp_path: Path):
file2 = tmp_path / "file2.bin"
file1.write_bytes(b"\x00\x01\x02\x03")
file2.write_bytes(b"\x00\x01\x02\x04")
assert harvest._files_differ(str(file1), str(file2)) is True
assert files_differ(str(file1), str(file2)) is True
def test_files_differ_non_regular_a(tmp_path: Path):
@ -437,14 +446,14 @@ def test_files_differ_non_regular_a(tmp_path: Path):
directory.mkdir()
file1 = tmp_path / "file1.txt"
file1.write_text("content", encoding="utf-8")
assert harvest._files_differ(str(directory), str(file1)) is True
assert files_differ(str(directory), str(file1)) is True
def test_topdirs_for_package_with_multiple_paths():
pkg_to_etc_paths = {
"nginx": ["/etc/nginx/nginx.conf", "/etc/nginx/sites-enabled/default"],
}
result = harvest._topdirs_for_package("nginx", pkg_to_etc_paths)
result = _topdirs_for_package("nginx", pkg_to_etc_paths)
assert result == {"nginx"}
@ -452,12 +461,12 @@ def test_topdirs_for_package_with_multiple_topdirs():
pkg_to_etc_paths = {
"multi": ["/etc/nginx/nginx.conf", "/etc/ssh/sshd_config"],
}
result = harvest._topdirs_for_package("multi", pkg_to_etc_paths)
result = _topdirs_for_package("multi", pkg_to_etc_paths)
assert result == {"nginx", "ssh"}
def test_topdirs_for_package_empty():
result = harvest._topdirs_for_package("empty", {})
result = _topdirs_for_package("empty", {})
assert result == set()
@ -465,7 +474,7 @@ def test_topdirs_for_package_no_etc():
pkg_to_etc_paths = {
"other": ["/usr/share/doc/file"],
}
result = harvest._topdirs_for_package("other", pkg_to_etc_paths)
result = _topdirs_for_package("other", pkg_to_etc_paths)
assert result == set()
@ -475,7 +484,7 @@ def test_files_differ_same_content(tmp_path: Path):
file_b = tmp_path / "b.txt"
file_a.write_text("same content", encoding="utf-8")
file_b.write_text("same content", encoding="utf-8")
assert harvest._files_differ(str(file_a), str(file_b)) is False
assert files_differ(str(file_a), str(file_b)) is False
def test_files_differ_different_content(tmp_path: Path):
@ -484,7 +493,7 @@ def test_files_differ_different_content(tmp_path: Path):
file_b = tmp_path / "b.txt"
file_a.write_text("content a", encoding="utf-8")
file_b.write_text("content b", encoding="utf-8")
assert harvest._files_differ(str(file_a), str(file_b)) is True
assert files_differ(str(file_a), str(file_b)) is True
def test_files_differ_missing_file(tmp_path: Path):
@ -492,7 +501,7 @@ def test_files_differ_missing_file(tmp_path: Path):
file_a = tmp_path / "a.txt"
file_a.write_text("content", encoding="utf-8")
file_b = tmp_path / "b.txt"
assert harvest._files_differ(str(file_a), str(file_b)) is True
assert files_differ(str(file_a), str(file_b)) is True
def test_files_differ_both_missing(tmp_path: Path):
@ -500,7 +509,7 @@ def test_files_differ_both_missing(tmp_path: Path):
file_a = tmp_path / "a.txt"
file_b = tmp_path / "b.txt"
# Both missing - should return True (they differ in the sense that neither exists)
assert harvest._files_differ(str(file_a), str(file_b)) is True
assert files_differ(str(file_a), str(file_b)) is True
def test_files_differ_non_regular_b(tmp_path: Path):
@ -510,7 +519,7 @@ def test_files_differ_non_regular_b(tmp_path: Path):
link_b = tmp_path / "link"
link_b.symlink_to(file_a)
# Symlinks are followed, so content is the same
assert harvest._files_differ(str(file_a), str(link_b)) is False
assert files_differ(str(file_a), str(link_b)) is False
def test_files_differ_oserror_on_read(tmp_path: Path, monkeypatch):
@ -524,7 +533,7 @@ def test_files_differ_oserror_on_read(tmp_path: Path, monkeypatch):
raise OSError("Permission denied")
monkeypatch.setattr("builtins.open", fake_open, raising=False)
assert harvest._files_differ(str(file_a), str(file_b)) is True
assert files_differ(str(file_a), str(file_b)) is True
def test_files_differ_large_file_returns_true(tmp_path: Path):
@ -536,7 +545,7 @@ def test_files_differ_large_file_returns_true(tmp_path: Path):
file_a.write_bytes(data)
file_b.write_bytes(data)
# Should return True because files are too large
assert harvest._files_differ(str(file_a), str(file_b), max_bytes=1_000_000) is True
assert files_differ(str(file_a), str(file_b), max_bytes=1_000_000) is True
def test_files_differ_size_mismatch(tmp_path: Path):
@ -545,7 +554,7 @@ def test_files_differ_size_mismatch(tmp_path: Path):
file_b = tmp_path / "b.txt"
file_a.write_text("short", encoding="utf-8")
file_b.write_text("much longer content here", encoding="utf-8")
assert harvest._files_differ(str(file_a), str(file_b)) is True
assert files_differ(str(file_a), str(file_b)) is True
def test_files_differ_large_files(tmp_path: Path):
@ -556,12 +565,12 @@ def test_files_differ_large_files(tmp_path: Path):
data = b"x" * 10000
file_a.write_bytes(data)
file_b.write_bytes(data)
assert harvest._files_differ(str(file_a), str(file_b)) is False
assert files_differ(str(file_a), str(file_b)) is False
def test_hint_names_with_unit_and_packages():
"""Test _hint_names extracts hints from unit and packages."""
result = harvest._hint_names("nginx.service", {"nginx-common", "nginx-core"})
result = _hint_names("nginx.service", {"nginx-common", "nginx-core"})
assert "nginx" in result
assert "nginx-common" in result
assert "nginx-core" in result
@ -569,20 +578,20 @@ def test_hint_names_with_unit_and_packages():
def test_hint_names_with_template_unit():
"""Test _hint_names handles template units."""
result = harvest._hint_names("getty@tty1.service", set())
result = _hint_names("getty@tty1.service", set())
assert "getty" in result
assert "getty@tty1" in result
def test_hint_names_with_dotted_unit():
"""Test _hint_names handles dotted unit names."""
result = harvest._hint_names("nginx.service", set())
result = _hint_names("nginx.service", set())
assert "nginx" in result
def test_hint_names_empty():
"""Test _hint_names with empty inputs."""
result = harvest._hint_names("", set())
result = _hint_names("", set())
assert result == set()
@ -594,7 +603,7 @@ def test_add_pkgs_from_etc_topdirs():
"ssh": {"openssh-server"},
}
pkgs = set()
harvest._add_pkgs_from_etc_topdirs(hints, topdir_to_pkgs, pkgs)
add_pkgs_from_etc_topdirs(hints, topdir_to_pkgs, pkgs)
# Should add packages from matching topdirs
assert "nginx-common" in pkgs or "nginx-core" in pkgs
@ -604,7 +613,7 @@ def test_add_pkgs_from_etc_topdirs_empty():
hints = set()
topdir_to_pkgs = {}
pkgs = set()
harvest._add_pkgs_from_etc_topdirs(hints, topdir_to_pkgs, pkgs)
add_pkgs_from_etc_topdirs(hints, topdir_to_pkgs, pkgs)
assert pkgs == set()
@ -612,47 +621,47 @@ def test_is_confish_with_conf(tmp_path: Path):
"""Test _is_confish recognizes .conf files."""
file1 = tmp_path / "test.conf"
file1.write_text("[Unit]", encoding="utf-8")
assert harvest._is_confish(str(file1)) is True
assert _is_confish(str(file1)) is True
def test_is_confish_with_yaml(tmp_path: Path):
"""Test _is_confish recognizes .yaml files."""
file1 = tmp_path / "test.yaml"
file1.write_text("key: value", encoding="utf-8")
assert harvest._is_confish(str(file1)) is True
assert _is_confish(str(file1)) is True
def test_is_confish_with_json(tmp_path: Path):
"""Test _is_confish recognizes .json files."""
file1 = tmp_path / "test.json"
file1.write_text('{"key": "value"}', encoding="utf-8")
assert harvest._is_confish(str(file1)) is True
assert _is_confish(str(file1)) is True
def test_is_confish_with_service(tmp_path: Path):
"""Test _is_confish recognizes .service files."""
file1 = tmp_path / "test.service"
file1.write_text("[Unit]", encoding="utf-8")
assert harvest._is_confish(str(file1)) is True
assert _is_confish(str(file1)) is True
def test_is_confish_with_extensionless(tmp_path: Path):
"""Test _is_confish recognizes extensionless config files."""
file1 = tmp_path / "default"
file1.write_text("OPTIONS=", encoding="utf-8")
assert harvest._is_confish(str(file1)) is True
assert _is_confish(str(file1)) is True
def test_is_confish_not_config(tmp_path: Path):
"""Test _is_confish rejects non-config files."""
file1 = tmp_path / "test.log"
file1.write_text("log", encoding="utf-8")
assert harvest._is_confish(str(file1)) is False
assert _is_confish(str(file1)) is False
def test_is_confish_nonexistent():
"""Test _is_confish returns False for nonexistent files."""
assert harvest._is_confish("/nonexistent/file.xyz") is False
assert _is_confish("/nonexistent/file.xyz") is False
"""Additional coverage tests for harvest.py"""
@ -1065,7 +1074,7 @@ def test_user_shell_dotfiles_are_not_auto_captured_without_dangerous(tmp_path: P
managed: list[ManagedFile] = []
excluded: list[ExcludedFile] = []
captured = harvest._capture_user_shell_dotfiles(
captured = capture_user_shell_dotfiles(
bundle_dir=str(tmp_path / "bundle"),
role_name="users",
home=str(home),
@ -1106,7 +1115,7 @@ def test_user_shell_dotfiles_dangerous_captures_changed_files_only(tmp_path: Pat
managed: list[ManagedFile] = []
excluded: list[ExcludedFile] = []
captured = harvest._capture_user_shell_dotfiles(
captured = capture_user_shell_dotfiles(
bundle_dir=str(tmp_path / "bundle"),
role_name="users",
home=str(home),

View file

@ -1,13 +1,10 @@
from __future__ import annotations
from enroll.harvest import (
FirewallRuntimeSnapshot,
HarvestContext,
IgnorePolicy,
PathFilter,
RuntimeStateCollector,
SysctlSnapshot,
)
from enroll.harvest_collectors.context import HarvestContext
from enroll.harvest_collectors.runtime import RuntimeStateCollector
from enroll.harvest_types import FirewallRuntimeSnapshot, SysctlSnapshot
from enroll.ignore import IgnorePolicy
from enroll.pathfilter import PathFilter
class _Backend:

View file

@ -4,6 +4,8 @@ import json
from pathlib import Path
import enroll.harvest as h
import enroll.capture as capture
import enroll.harvest_collectors.cron_logrotate as cron_logrotate
from enroll.platform import PlatformInfo
from enroll.systemd import UnitInfo
@ -89,7 +91,7 @@ def test_harvest_unifies_cron_and_logrotate_into_dedicated_package_roles(
}
return list(mapping.get(spec, []))[:cap]
monkeypatch.setattr(h, "_iter_matching_files", fake_iter_matching)
monkeypatch.setattr(cron_logrotate, "iter_matching_files", fake_iter_matching)
# Avoid real system probing.
monkeypatch.setattr(
@ -128,7 +130,7 @@ def test_harvest_unifies_cron_and_logrotate_into_dedicated_package_roles(
)
monkeypatch.setattr(h, "collect_non_system_users", lambda: [])
monkeypatch.setattr(
h,
capture,
"stat_triplet",
lambda p: ("alice" if "alice" in p else "root", "root", "0644"),
)
@ -139,7 +141,7 @@ def test_harvest_unifies_cron_and_logrotate_into_dedicated_package_roles(
dst.parent.mkdir(parents=True, exist_ok=True)
dst.write_bytes(files.get(abs_path, b""))
monkeypatch.setattr(h, "_copy_into_bundle", fake_copy)
monkeypatch.setattr(capture, "copy_into_bundle", fake_copy)
state_path = h.harvest(str(bundle), policy=AllowAllPolicy())
st = json.loads(Path(state_path).read_text(encoding="utf-8"))

View file

@ -4,6 +4,8 @@ import os
from pathlib import Path
import enroll.harvest as h
import enroll.system_paths as sp
from enroll.package_hints import role_name_from_pkg, role_name_from_unit
def test_iter_matching_files_skips_symlinks_and_walks_dirs(monkeypatch, tmp_path: Path):
@ -24,12 +26,12 @@ def test_iter_matching_files_skips_symlinks_and_walks_dirs(monkeypatch, tmp_path
str(root / "link"): "link",
}
monkeypatch.setattr(h.glob, "glob", lambda spec: [str(root), str(root / "link")])
monkeypatch.setattr(h.os.path, "islink", lambda p: paths.get(p) == "link")
monkeypatch.setattr(h.os.path, "isfile", lambda p: paths.get(p) == "file")
monkeypatch.setattr(h.os.path, "isdir", lambda p: paths.get(p) == "dir")
monkeypatch.setattr(sp.glob, "glob", lambda spec: [str(root), str(root / "link")])
monkeypatch.setattr(sp.os.path, "islink", lambda p: paths.get(p) == "link")
monkeypatch.setattr(sp.os.path, "isfile", lambda p: paths.get(p) == "file")
monkeypatch.setattr(sp.os.path, "isdir", lambda p: paths.get(p) == "dir")
monkeypatch.setattr(
h.os,
sp.os,
"walk",
lambda p: [
(str(root), ["sub"], ["real.txt", "link"]),
@ -37,7 +39,7 @@ def test_iter_matching_files_skips_symlinks_and_walks_dirs(monkeypatch, tmp_path
],
)
out = h._iter_matching_files("/whatever/*", cap=100)
out = sp.iter_matching_files("/whatever/*", cap=100)
assert str(root / "real.txt") in out
assert str(root / "sub" / "nested.txt") in out
assert str(root / "link") not in out
@ -57,7 +59,7 @@ def test_parse_apt_signed_by_extracts_keyrings(tmp_path: Path):
f3 = tmp_path / "c.sources"
f3.write_text("Signed-By: | /bin/echo nope\n", encoding="utf-8")
out = h._parse_apt_signed_by([str(f1), str(f2), str(f3)])
out = sp.parse_apt_signed_by([str(f1), str(f2), str(f3)])
assert "/usr/share/keyrings/foo.gpg" in out
assert "/etc/apt/keyrings/bar.gpg" in out
assert "/usr/share/keyrings/baz.gpg" in out
@ -74,9 +76,9 @@ def test_iter_apt_capture_paths_includes_signed_by_keyring(monkeypatch):
"/usr/share/keyrings/ext.gpg": "file",
}
monkeypatch.setattr(h.os.path, "isdir", lambda p: p in {"/etc/apt"})
monkeypatch.setattr(sp.os.path, "isdir", lambda p: p in {"/etc/apt"})
monkeypatch.setattr(
h.os,
sp.os,
"walk",
lambda root: [
("/etc/apt", ["apt.conf.d", "sources.list.d"], []),
@ -84,8 +86,8 @@ def test_iter_apt_capture_paths_includes_signed_by_keyring(monkeypatch):
("/etc/apt/sources.list.d", [], ["test.list"]),
],
)
monkeypatch.setattr(h.os.path, "islink", lambda p: False)
monkeypatch.setattr(h.os.path, "isfile", lambda p: files.get(p) == "file")
monkeypatch.setattr(sp.os.path, "islink", lambda p: False)
monkeypatch.setattr(sp.os.path, "isfile", lambda p: files.get(p) == "file")
# Only treat the sources glob as having a hit.
def fake_iter_matching(spec: str, cap: int = 10000):
@ -93,7 +95,7 @@ def test_iter_apt_capture_paths_includes_signed_by_keyring(monkeypatch):
return ["/etc/apt/sources.list.d/test.list"]
return []
monkeypatch.setattr(h, "_iter_matching_files", fake_iter_matching)
monkeypatch.setattr(sp, "iter_matching_files", fake_iter_matching)
# Provide file contents for the sources file.
real_open = open
@ -105,10 +107,10 @@ def test_iter_apt_capture_paths_includes_signed_by_keyring(monkeypatch):
# Easier: patch _parse_apt_signed_by directly to avoid filesystem reads.
monkeypatch.setattr(
h, "_parse_apt_signed_by", lambda sfs: {"/usr/share/keyrings/ext.gpg"}
sp, "parse_apt_signed_by", lambda sfs: {"/usr/share/keyrings/ext.gpg"}
)
out = h._iter_apt_capture_paths()
out = sp.iter_apt_capture_paths()
paths = {p for p, _r in out}
reasons = {p: r for p, r in out}
assert "/etc/apt/apt.conf.d/00test" in paths
@ -138,19 +140,23 @@ def test_iter_dnf_capture_paths(monkeypatch):
return [("/etc/pki/rpm-gpg", [], ["RPM-GPG-KEY"])]
return []
monkeypatch.setattr(h.os.path, "isdir", isdir)
monkeypatch.setattr(h.os, "walk", walk)
monkeypatch.setattr(h.os.path, "islink", lambda p: False)
monkeypatch.setattr(h.os.path, "isfile", lambda p: files.get(p) == "file")
monkeypatch.setattr(
h,
"_iter_matching_files",
lambda spec, cap=10000: (
["/etc/yum.repos.d/test.repo"] if spec.endswith("*.repo") else []
),
)
monkeypatch.setattr(sp.os.path, "isdir", isdir)
monkeypatch.setattr(sp.os, "walk", walk)
monkeypatch.setattr(sp.os.path, "islink", lambda p: False)
monkeypatch.setattr(sp.os.path, "isfile", lambda p: files.get(p) == "file")
out = h._iter_dnf_capture_paths()
def fake_iter_matching(spec: str, cap: int = 10000):
if spec == "/etc/yum.conf":
return ["/etc/yum.conf"]
if spec.endswith("*.repo"):
return ["/etc/yum.repos.d/test.repo"]
if spec == "/etc/pki/rpm-gpg/*":
return ["/etc/pki/rpm-gpg/RPM-GPG-KEY"]
return []
monkeypatch.setattr(sp, "iter_matching_files", fake_iter_matching)
out = sp.iter_dnf_capture_paths()
paths = {p for p, _r in out}
assert "/etc/dnf/dnf.conf" in paths
assert "/etc/yum/yum.conf" in paths
@ -160,13 +166,13 @@ def test_iter_dnf_capture_paths(monkeypatch):
def test_iter_system_capture_paths_dedupes_first_reason(monkeypatch):
monkeypatch.setattr(h, "_SYSTEM_CAPTURE_GLOBS", [("/a", "r1"), ("/b", "r2")])
monkeypatch.setattr(sp, "_SYSTEM_CAPTURE_GLOBS", [("/a", "r1"), ("/b", "r2")])
monkeypatch.setattr(
h,
"_iter_matching_files",
sp,
"iter_matching_files",
lambda spec, cap=10000: ["/dup"] if spec in {"/a", "/b"} else [],
)
out = h._iter_system_capture_paths()
out = sp.iter_system_capture_paths()
assert out == [("/dup", "r1")]
@ -289,20 +295,16 @@ def test_collect_firewall_runtime_snapshot_is_per_family_fallback(
def test_package_role_names_do_not_collide_with_singleton_roles():
from enroll.harvest import _role_name_from_pkg
assert _role_name_from_pkg("flatpak") == "package_flatpak"
assert _role_name_from_pkg("snap") == "package_snap"
assert _role_name_from_pkg("users") == "package_users"
assert _role_name_from_pkg("nginx") == "nginx"
assert role_name_from_pkg("flatpak") == "package_flatpak"
assert role_name_from_pkg("snap") == "package_snap"
assert role_name_from_pkg("users") == "package_users"
assert role_name_from_pkg("nginx") == "nginx"
def test_service_role_names_do_not_collide_with_singleton_roles():
from enroll.harvest import _role_name_from_unit
assert _role_name_from_unit("flatpak.service") == "service_flatpak"
assert _role_name_from_unit("users.service") == "service_users"
assert _role_name_from_unit("nginx.service") == "nginx"
assert role_name_from_unit("flatpak.service") == "service_flatpak"
assert role_name_from_unit("users.service") == "service_users"
assert role_name_from_unit("nginx.service") == "nginx"
def test_parse_sysctl_a_output_keeps_persistable_values(monkeypatch):

View file

@ -2,6 +2,8 @@ import json
from pathlib import Path
import enroll.harvest as h
import enroll.harvest_collectors.services as services
import enroll.capture as capture
from enroll.platform import PlatformInfo
from enroll.systemd import UnitInfo
@ -78,7 +80,7 @@ def _base_monkeypatches(monkeypatch, *, unit: str):
# Avoid walking the real filesystem.
monkeypatch.setattr(h.os, "walk", lambda root: iter(()))
monkeypatch.setattr(h, "_copy_into_bundle", lambda *a, **k: None)
monkeypatch.setattr(capture, "copy_into_bundle", lambda *a, **k: None)
# Default to a "no files exist" view of the world unless a test overrides.
monkeypatch.setattr(h.os.path, "isfile", lambda p: False)
@ -119,7 +121,7 @@ def test_harvest_captures_nginx_enabled_symlinks(monkeypatch, tmp_path: Path):
return ["/etc/nginx/modules-enabled/mod-http"]
return []
monkeypatch.setattr(h.glob, "glob", fake_glob)
monkeypatch.setattr(services.glob, "glob", fake_glob)
state_path = h.harvest(str(bundle), policy=AllowAllPolicy())
st = json.loads(Path(state_path).read_text(encoding="utf-8"))
@ -158,7 +160,7 @@ def test_harvest_does_not_capture_enabled_symlinks_without_role(
},
)
monkeypatch.setattr(
h.glob, "glob", lambda pat: ["/etc/nginx/sites-enabled/default"]
services.glob, "glob", lambda pat: ["/etc/nginx/sites-enabled/default"]
)
monkeypatch.setattr(h.os.path, "islink", lambda p: True)
monkeypatch.setattr(h.os, "readlink", lambda p: "../sites-available/default")
@ -186,7 +188,7 @@ def test_harvest_symlink_capture_respects_ignore_policy(monkeypatch, tmp_path: P
monkeypatch.setattr(h.os.path, "islink", lambda p: p in links)
monkeypatch.setattr(h.os, "readlink", lambda p: links[p])
monkeypatch.setattr(
h.glob,
services.glob,
"glob",
lambda pat: (
sorted(list(links.keys())) if pat == "/etc/nginx/sites-enabled/*" else []
@ -251,7 +253,7 @@ def test_harvest_captures_apache2_enabled_symlinks(monkeypatch, tmp_path: Path):
return ["/etc/apache2/conf-enabled/security.conf"]
return []
monkeypatch.setattr(h.glob, "glob", fake_glob)
monkeypatch.setattr(services.glob, "glob", fake_glob)
state_path = h.harvest(str(bundle), policy=AllowAllPolicy())
st = json.loads(Path(state_path).read_text(encoding="utf-8"))

View file

@ -3,6 +3,8 @@ from __future__ import annotations
import json
from pathlib import Path
import yaml
from enroll import manifest
@ -160,36 +162,55 @@ def test_manifest_puppet_writes_control_repo_style_output(tmp_path: Path):
manifest.manifest(str(bundle), str(out), target="puppet", fqdn="test.example")
site_pp = (out / "manifests" / "site.pp").read_text(encoding="utf-8")
assert site_pp == (
"node 'test.example' {\n"
" include curl\n"
" include foo\n"
" include users\n"
" include sysctl\n"
"}\n"
assert "node 'test.example' {" in site_pp
assert "lookup('enroll::classes'" in site_pp
assert "$enroll_classes.each" in site_pp
assert "include $enroll_class" in site_pp
assert "node default {" in site_pp
assert (out / "hiera.yaml").exists()
node_data = yaml.safe_load(
(out / "data" / "nodes" / "test.example.yaml").read_text(encoding="utf-8")
)
assert node_data["enroll::classes"] == ["curl", "foo", "users", "sysctl"]
assert node_data["curl::packages"] == ["curl"]
assert node_data["foo::packages"] == ["foo"]
assert node_data["foo::files"]["/etc/foo/foo.conf"]["source"] == (
"puppet:///modules/foo/nodes/test.example/etc/foo.conf"
)
assert node_data["foo::services"]["foo.service"] == {
"ensure": "running",
"enable": True,
}
assert node_data["users::users"]["alice"]["comment"] == "Alice Example"
assert node_data["users::users"]["alice"]["groups"] == ["docker"]
assert node_data["sysctl::files"]["/etc/sysctl.d/99-enroll.conf"]["source"] == (
"puppet:///modules/sysctl/nodes/test.example/sysctl/99-enroll.conf"
)
curl_pp = (out / "modules" / "curl" / "manifests" / "init.pp").read_text(
encoding="utf-8"
)
assert "class curl" in curl_pp
assert "package { 'curl':" in curl_pp
assert "Array[String] $packages = []" in curl_pp
assert "package { $package_name:" in curl_pp
assert "package { 'curl':" not in curl_pp
foo_pp = (out / "modules" / "foo" / "manifests" / "init.pp").read_text(
encoding="utf-8"
)
assert "class foo" in foo_pp
assert "package { 'foo':" in foo_pp
assert "file { '/etc/foo/foo.conf':" in foo_pp
assert "source => 'puppet:///modules/foo/etc/foo.conf'" in foo_pp
assert "service { 'foo.service':" in foo_pp
assert "Hash[String, Hash] $files = {}" in foo_pp
assert "* => $attrs" in foo_pp
assert "package { 'foo':" not in foo_pp
assert "file { '/etc/foo/foo.conf':" not in foo_pp
users_pp = (out / "modules" / "users" / "manifests" / "init.pp").read_text(
encoding="utf-8"
)
assert "class users" in users_pp
assert "group { 'docker':" in users_pp
assert "user { 'alice':" in users_pp
assert "Hash[String, Hash] $users = {}" in users_pp
assert "user { 'alice':" not in users_pp
sysctl_pp = (out / "modules" / "sysctl" / "manifests" / "init.pp").read_text(
encoding="utf-8"
@ -198,11 +219,162 @@ def test_manifest_puppet_writes_control_repo_style_output(tmp_path: Path):
assert "Boolean $sysctl_apply = true" in sysctl_pp
assert "Boolean $sysctl_ignore_apply_errors = true" in sysctl_pp
assert "exec { 'enroll-apply-sysctl':" in sysctl_pp
assert "command => $sysctl_ignore_apply_errors ? {" in sysctl_pp
assert "sysctl -e -p /etc/sysctl.d/99-enroll.conf || true" in sysctl_pp
assert "$files.has_key('/etc/sysctl.d/99-enroll.conf')" in sysctl_pp
assert (out / "modules" / "foo" / "files" / "etc" / "foo.conf").exists()
assert (out / "modules" / "sysctl" / "files" / "sysctl" / "99-enroll.conf").exists()
assert (
out
/ "modules"
/ "foo"
/ "files"
/ "nodes"
/ "test.example"
/ "etc"
/ "foo.conf"
).exists()
assert (
out
/ "modules"
/ "sysctl"
/ "files"
/ "nodes"
/ "test.example"
/ "sysctl"
/ "99-enroll.conf"
).exists()
def test_manifest_puppet_fqdn_mode_can_accumulate_separate_node_data(
tmp_path: Path,
):
out = tmp_path / "puppet"
def write_bundle(name: str, content: str) -> Path:
bundle = tmp_path / name
artifact = bundle / "artifacts" / "foo" / "etc" / "foo.conf"
artifact.parent.mkdir(parents=True, exist_ok=True)
artifact.write_text(content, encoding="utf-8")
_write_state(
bundle,
{
"schema_version": 3,
"host": {"hostname": name, "os": "debian", "pkg_backend": "dpkg"},
"inventory": {"packages": {}},
"roles": {
"services": [
{
"unit": "foo.service",
"role_name": "foo",
"packages": ["foo"],
"active_state": "active",
"unit_file_state": "enabled",
"managed_dirs": [],
"managed_files": [
{
"path": "/etc/foo/foo.conf",
"src_rel": "etc/foo.conf",
"owner": "root",
"group": "root",
"mode": "0644",
}
],
"managed_links": [],
}
],
"packages": [],
"users": {
"role_name": "users",
"users": [],
"managed_dirs": [],
"managed_files": [],
},
"apt_config": {
"role_name": "apt_config",
"managed_dirs": [],
"managed_files": [],
},
"dnf_config": {
"role_name": "dnf_config",
"managed_dirs": [],
"managed_files": [],
},
"sysctl": {
"role_name": "sysctl",
"managed_dirs": [],
"managed_files": [],
},
"firewall_runtime": {
"role_name": "firewall_runtime",
"packages": [],
},
"etc_custom": {
"role_name": "etc_custom",
"managed_dirs": [],
"managed_files": [],
},
"usr_local_custom": {
"role_name": "usr_local_custom",
"managed_dirs": [],
"managed_files": [],
},
"extra_paths": {
"role_name": "extra_paths",
"managed_dirs": [],
"managed_files": [],
"managed_links": [],
},
},
},
)
return bundle
first = write_bundle("first", "first = true\n")
second = write_bundle("second", "second = true\n")
manifest.manifest(str(first), str(out), target="puppet", fqdn="first.example")
manifest.manifest(str(second), str(out), target="puppet", fqdn="second.example")
assert (out / "data" / "nodes" / "first.example.yaml").exists()
assert (out / "data" / "nodes" / "second.example.yaml").exists()
site_pp = (out / "manifests" / "site.pp").read_text(encoding="utf-8")
assert "node 'first.example' {" in site_pp
assert "node 'second.example' {" in site_pp
first_artifact = (
out
/ "modules"
/ "foo"
/ "files"
/ "nodes"
/ "first.example"
/ "etc"
/ "foo.conf"
)
second_artifact = (
out
/ "modules"
/ "foo"
/ "files"
/ "nodes"
/ "second.example"
/ "etc"
/ "foo.conf"
)
assert first_artifact.read_text(encoding="utf-8") == "first = true\n"
assert second_artifact.read_text(encoding="utf-8") == "second = true\n"
first_data = yaml.safe_load(
(out / "data" / "nodes" / "first.example.yaml").read_text(encoding="utf-8")
)
second_data = yaml.safe_load(
(out / "data" / "nodes" / "second.example.yaml").read_text(encoding="utf-8")
)
assert first_data["foo::files"]["/etc/foo/foo.conf"]["source"] == (
"puppet:///modules/foo/nodes/first.example/etc/foo.conf"
)
assert second_data["foo::files"]["/etc/foo/foo.conf"]["source"] == (
"puppet:///modules/foo/nodes/second.example/etc/foo.conf"
)
def test_manifest_puppet_uses_default_node_and_common_package_modules(tmp_path: Path):