From 824010b2ab15865b0c1845d8cc9e67a80c7accf2 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sat, 3 Jan 2026 11:39:57 +1100 Subject: [PATCH] Several bug fixes and prep for 0.2.2 - Fix stat() of parent directory so that we set directory perms correct on --include paths. - Set pty for remote calls when sudo is required, to help systems with limits on sudo without pty --- CHANGELOG.md | 5 ++ README.md | 2 +- debian/changelog | 7 ++ enroll/harvest.py | 171 +++++++++++++++++++++++++++++++++++++++------ enroll/manifest.py | 68 ++++++++++++++---- enroll/remote.py | 34 +++++---- pyproject.toml | 2 +- release.sh | 16 ++--- rpm/enroll.spec | 5 +- 9 files changed, 249 insertions(+), 61 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c41210..0740cb4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +# 0.2.2 + + * Fix stat() of parent directory so that we set directory perms correct on --include paths. + * Set pty for remote calls when sudo is required, to help systems with limits on sudo without pty + # 0.2.1 * Don't accidentally add extra_paths role to usr_local_custom list, resulting in extra_paths appearing twice in manifested playbook diff --git a/README.md b/README.md index f4920b5..e399633 100644 --- a/README.md +++ b/README.md @@ -199,7 +199,7 @@ sudo rpm --import https://mig5.net/static/mig5.asc sudo tee /etc/yum.repos.d/mig5.repo > /dev/null << 'EOF' [mig5] name=mig5 Repository -baseurl=https://rpm.mig5.net/rpm/$basearch +baseurl=https://rpm.mig5.net/rpm/$releasever/$basearch enabled=1 gpgcheck=1 repo_gpgcheck=1 diff --git a/debian/changelog b/debian/changelog index dbc7548..8c2f4b9 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,10 @@ +enroll (0.2.2) unstable; urgency=medium + + * Fix stat() of parent directory so that we set directory perms correct on --include paths. + * Set pty for remote calls when sudo is required, to help systems with limits on sudo without pty + + -- Miguel Jacq Sat, 02 Jan 2026 09:56:00 +1100 + enroll (0.2.1) unstable; urgency=medium * Don't accidentally add extra_paths role to usr_local_custom list, resulting in extra_paths appearing twice in manifested playbook diff --git a/enroll/harvest.py b/enroll/harvest.py index 98e1404..7aba7c6 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -6,7 +6,7 @@ import os import re import shutil import time -from dataclasses import dataclass, asdict +from dataclasses import dataclass, asdict, field from typing import Dict, List, Optional, Set from .systemd import ( @@ -58,59 +58,66 @@ class ServiceSnapshot: sub_state: Optional[str] unit_file_state: Optional[str] condition_result: Optional[str] - managed_files: List[ManagedFile] - excluded: List[ExcludedFile] - notes: List[str] + managed_dirs: List[ManagedDir] = field(default_factory=list) + managed_files: List[ManagedFile] = field(default_factory=list) + excluded: List[ExcludedFile] = field(default_factory=list) + notes: List[str] = field(default_factory=list) @dataclass class PackageSnapshot: package: str role_name: str - managed_files: List[ManagedFile] - excluded: List[ExcludedFile] - notes: List[str] + managed_dirs: List[ManagedDir] = field(default_factory=list) + managed_files: List[ManagedFile] = field(default_factory=list) + excluded: List[ExcludedFile] = field(default_factory=list) + notes: List[str] = field(default_factory=list) @dataclass class UsersSnapshot: role_name: str users: List[dict] - managed_files: List[ManagedFile] - excluded: List[ExcludedFile] - notes: List[str] + managed_dirs: List[ManagedDir] = field(default_factory=list) + managed_files: List[ManagedFile] = field(default_factory=list) + excluded: List[ExcludedFile] = field(default_factory=list) + notes: List[str] = field(default_factory=list) @dataclass class AptConfigSnapshot: role_name: str - managed_files: List[ManagedFile] - excluded: List[ExcludedFile] - notes: List[str] + managed_dirs: List[ManagedDir] = field(default_factory=list) + managed_files: List[ManagedFile] = field(default_factory=list) + excluded: List[ExcludedFile] = field(default_factory=list) + notes: List[str] = field(default_factory=list) @dataclass class DnfConfigSnapshot: role_name: str - managed_files: List[ManagedFile] - excluded: List[ExcludedFile] - notes: List[str] + managed_dirs: List[ManagedDir] = field(default_factory=list) + managed_files: List[ManagedFile] = field(default_factory=list) + excluded: List[ExcludedFile] = field(default_factory=list) + notes: List[str] = field(default_factory=list) @dataclass class EtcCustomSnapshot: role_name: str - managed_files: List[ManagedFile] - excluded: List[ExcludedFile] - notes: List[str] + managed_dirs: List[ManagedDir] = field(default_factory=list) + managed_files: List[ManagedFile] = field(default_factory=list) + excluded: List[ExcludedFile] = field(default_factory=list) + notes: List[str] = field(default_factory=list) @dataclass class UsrLocalCustomSnapshot: role_name: str - managed_files: List[ManagedFile] - excluded: List[ExcludedFile] - notes: List[str] + managed_dirs: List[ManagedDir] = field(default_factory=list) + managed_files: List[ManagedFile] = field(default_factory=list) + excluded: List[ExcludedFile] = field(default_factory=list) + notes: List[str] = field(default_factory=list) @dataclass @@ -149,6 +156,71 @@ ALLOWED_UNOWNED_EXTS = { MAX_FILES_CAP = 4000 MAX_UNOWNED_FILES_PER_ROLE = 500 + +def _merge_parent_dirs( + existing_dirs: List[ManagedDir], + managed_files: List[ManagedFile], + *, + policy: IgnorePolicy, +) -> List[ManagedDir]: + """Ensure parent directories for managed_files are present in managed_dirs. + + This is used so the Ansible manifest can create destination directories with + explicit owner/group/mode (ansible-lint friendly) without needing a separate + "mkdir without perms" task. + + We only add the immediate parent directory for each managed file. For + explicit directory includes (extra_paths), existing_dirs will already + contain the walked directory tree. + """ + by_path: Dict[str, ManagedDir] = { + d.path: d for d in (existing_dirs or []) if d.path + } + + for mf in managed_files or []: + p = str(mf.path or "").rstrip("/") + if not p: + continue + dpath = os.path.dirname(p) + if not dpath or dpath == "/": + continue + if dpath in by_path: + continue + + # Directory-deny logic: newer IgnorePolicy implementations provide + # deny_reason_dir(). Older/simple policies (including unit tests) may + # only implement deny_reason(), which is file-oriented and may return + # "not_regular_file" for directories. + deny = None + deny_dir = getattr(policy, "deny_reason_dir", None) + if callable(deny_dir): + deny = deny_dir(dpath) + else: + deny = policy.deny_reason(dpath) + if deny in ("not_regular_file", "not_file", "not_regular"): + deny = None + if deny: + # If the file itself was captured, its parent directory is likely safe, + # but still respect deny globs for directories to avoid managing + # sensitive/forbidden trees. + continue + + try: + owner, group, mode = stat_triplet(dpath) + except OSError: + continue + + by_path[dpath] = ManagedDir( + path=dpath, + owner=owner, + group=group, + mode=mode, + reason="parent_of_managed_file", + ) + + return [by_path[k] for k in sorted(by_path)] + + # Directories that are shared across many packages. # Never attribute all unowned files in these trees # to one single package. @@ -1521,7 +1593,14 @@ def harvest( continue if dirpath not in extra_dir_seen: - deny = policy.deny_reason_dir(dirpath) + deny = None + deny_dir = getattr(policy, "deny_reason_dir", None) + if callable(deny_dir): + deny = deny_dir(dirpath) + else: + deny = policy.deny_reason(dirpath) + if deny in ("not_regular_file", "not_file", "not_regular"): + deny = None if not deny: try: owner, group, mode = stat_triplet(dirpath) @@ -1661,6 +1740,52 @@ def harvest( "roles": roles, } + # Ensure every role has explicit managed_dirs for parent directories of managed files. + # This lets the manifest create directories with owner/group/mode (ansible-lint friendly) + # without a separate "mkdir without perms" task. + users_snapshot.managed_dirs = _merge_parent_dirs( + users_snapshot.managed_dirs, users_snapshot.managed_files, policy=policy + ) + for s in service_snaps: + s.managed_dirs = _merge_parent_dirs( + s.managed_dirs, s.managed_files, policy=policy + ) + for p in pkg_snaps: + p.managed_dirs = _merge_parent_dirs( + p.managed_dirs, p.managed_files, policy=policy + ) + + if apt_config_snapshot: + apt_config_snapshot.managed_dirs = _merge_parent_dirs( + apt_config_snapshot.managed_dirs, + apt_config_snapshot.managed_files, + policy=policy, + ) + if dnf_config_snapshot: + dnf_config_snapshot.managed_dirs = _merge_parent_dirs( + dnf_config_snapshot.managed_dirs, + dnf_config_snapshot.managed_files, + policy=policy, + ) + if etc_custom_snapshot: + etc_custom_snapshot.managed_dirs = _merge_parent_dirs( + etc_custom_snapshot.managed_dirs, + etc_custom_snapshot.managed_files, + policy=policy, + ) + if usr_local_custom_snapshot: + usr_local_custom_snapshot.managed_dirs = _merge_parent_dirs( + usr_local_custom_snapshot.managed_dirs, + usr_local_custom_snapshot.managed_files, + policy=policy, + ) + if extra_paths_snapshot: + extra_paths_snapshot.managed_dirs = _merge_parent_dirs( + extra_paths_snapshot.managed_dirs, + extra_paths_snapshot.managed_files, + policy=policy, + ) + state = { "enroll": { "version": get_enroll_version(), diff --git a/enroll/manifest.py b/enroll/manifest.py index a373773..f30e5f3 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -422,12 +422,6 @@ def _render_generic_files_tasks( mode: "{{{{ item.mode }}}}" loop: "{{{{ {var_prefix}_managed_dirs | default([]) }}}}" -- name: Ensure destination directories exist - ansible.builtin.file: - path: "{{{{ item.dest | dirname }}}}" - state: directory - loop: "{{{{ {var_prefix}_managed_files | default([]) }}}}" - - name: Deploy any systemd unit files (templates) ansible.builtin.template: src: "{{{{ item.src_rel }}}}.j2" @@ -983,6 +977,7 @@ Generated non-system user accounts and SSH public material. var_prefix = role managed_files = apt_config_snapshot.get("managed_files", []) + managed_dirs = apt_config_snapshot.get("managed_dirs", []) or [] excluded = apt_config_snapshot.get("excluded", []) notes = apt_config_snapshot.get("notes", []) @@ -1019,12 +1014,20 @@ Generated non-system user accounts and SSH public material. notify_systemd=None, ) + dirs_var = _build_managed_dirs_var(managed_dirs) + jt_map = _yaml_load_mapping(jt_vars) if jt_vars.strip() else {} - vars_map: Dict[str, Any] = {f"{var_prefix}_managed_files": files_var} + vars_map: Dict[str, Any] = { + f"{var_prefix}_managed_files": files_var, + f"{var_prefix}_managed_dirs": dirs_var, + } vars_map = _merge_mappings_overwrite(vars_map, jt_map) if site_mode: - _write_role_defaults(role_dir, {f"{var_prefix}_managed_files": []}) + _write_role_defaults( + role_dir, + {f"{var_prefix}_managed_files": [], f"{var_prefix}_managed_dirs": []}, + ) _write_hostvars(out_dir, fqdn or "", role, vars_map) else: _write_role_defaults(role_dir, vars_map) @@ -1134,6 +1137,7 @@ APT configuration harvested from the system (sources, pinning, and keyrings). var_prefix = role managed_files = dnf_config_snapshot.get("managed_files", []) + managed_dirs = dnf_config_snapshot.get("managed_dirs", []) or [] excluded = dnf_config_snapshot.get("excluded", []) notes = dnf_config_snapshot.get("notes", []) @@ -1169,12 +1173,20 @@ APT configuration harvested from the system (sources, pinning, and keyrings). notify_systemd=None, ) + dirs_var = _build_managed_dirs_var(managed_dirs) + jt_map = _yaml_load_mapping(jt_vars) if jt_vars.strip() else {} - vars_map: Dict[str, Any] = {f"{var_prefix}_managed_files": files_var} + vars_map: Dict[str, Any] = { + f"{var_prefix}_managed_files": files_var, + f"{var_prefix}_managed_dirs": dirs_var, + } vars_map = _merge_mappings_overwrite(vars_map, jt_map) if site_mode: - _write_role_defaults(role_dir, {f"{var_prefix}_managed_files": []}) + _write_role_defaults( + role_dir, + {f"{var_prefix}_managed_files": [], f"{var_prefix}_managed_dirs": []}, + ) _write_hostvars(out_dir, fqdn or "", role, vars_map) else: _write_role_defaults(role_dir, vars_map) @@ -1285,6 +1297,7 @@ DNF/YUM configuration harvested from the system (repos, config files, and RPM GP var_prefix = role managed_files = etc_custom_snapshot.get("managed_files", []) + managed_dirs = etc_custom_snapshot.get("managed_dirs", []) or [] excluded = etc_custom_snapshot.get("excluded", []) notes = etc_custom_snapshot.get("notes", []) @@ -1321,12 +1334,20 @@ DNF/YUM configuration harvested from the system (repos, config files, and RPM GP notify_systemd="Run systemd daemon-reload", ) + dirs_var = _build_managed_dirs_var(managed_dirs) + jt_map = _yaml_load_mapping(jt_vars) if jt_vars.strip() else {} - vars_map: Dict[str, Any] = {f"{var_prefix}_managed_files": files_var} + vars_map: Dict[str, Any] = { + f"{var_prefix}_managed_files": files_var, + f"{var_prefix}_managed_dirs": dirs_var, + } vars_map = _merge_mappings_overwrite(vars_map, jt_map) if site_mode: - _write_role_defaults(role_dir, {f"{var_prefix}_managed_files": []}) + _write_role_defaults( + role_dir, + {f"{var_prefix}_managed_files": [], f"{var_prefix}_managed_dirs": []}, + ) _write_hostvars(out_dir, fqdn or "", role, vars_map) else: _write_role_defaults(role_dir, vars_map) @@ -1395,6 +1416,7 @@ Unowned /etc config files not attributed to packages or services. var_prefix = role managed_files = usr_local_custom_snapshot.get("managed_files", []) + managed_dirs = usr_local_custom_snapshot.get("managed_dirs", []) or [] excluded = usr_local_custom_snapshot.get("excluded", []) notes = usr_local_custom_snapshot.get("notes", []) @@ -1431,12 +1453,20 @@ Unowned /etc config files not attributed to packages or services. notify_systemd=None, ) + dirs_var = _build_managed_dirs_var(managed_dirs) + jt_map = _yaml_load_mapping(jt_vars) if jt_vars.strip() else {} - vars_map: Dict[str, Any] = {f"{var_prefix}_managed_files": files_var} + vars_map: Dict[str, Any] = { + f"{var_prefix}_managed_files": files_var, + f"{var_prefix}_managed_dirs": dirs_var, + } vars_map = _merge_mappings_overwrite(vars_map, jt_map) if site_mode: - _write_role_defaults(role_dir, {f"{var_prefix}_managed_files": []}) + _write_role_defaults( + role_dir, + {f"{var_prefix}_managed_files": [], f"{var_prefix}_managed_dirs": []}, + ) _write_hostvars(out_dir, fqdn or "", role, vars_map) else: _write_role_defaults(role_dir, vars_map) @@ -1616,6 +1646,7 @@ User-requested extra file harvesting. unit = svc["unit"] pkgs = svc.get("packages", []) or [] managed_files = svc.get("managed_files", []) or [] + managed_dirs = svc.get("managed_dirs", []) or [] role_dir = os.path.join(roles_root, role) _write_role_scaffold(role_dir) @@ -1660,11 +1691,14 @@ User-requested extra file harvesting. notify_systemd="Run systemd daemon-reload", ) + dirs_var = _build_managed_dirs_var(managed_dirs) + jt_map = _yaml_load_mapping(jt_vars) if jt_vars.strip() else {} base_vars: Dict[str, Any] = { f"{var_prefix}_unit_name": unit, f"{var_prefix}_packages": pkgs, f"{var_prefix}_managed_files": files_var, + f"{var_prefix}_managed_dirs": dirs_var, f"{var_prefix}_manage_unit": True, f"{var_prefix}_systemd_enabled": bool(enabled_at_harvest), f"{var_prefix}_systemd_state": desired_state, @@ -1679,6 +1713,7 @@ User-requested extra file harvesting. f"{var_prefix}_unit_name": unit, f"{var_prefix}_packages": [], f"{var_prefix}_managed_files": [], + f"{var_prefix}_managed_dirs": [], f"{var_prefix}_manage_unit": False, f"{var_prefix}_systemd_enabled": False, f"{var_prefix}_systemd_state": "stopped", @@ -1782,6 +1817,7 @@ Generated from `{unit}`. role = pr["role_name"] pkg = pr.get("package") or "" managed_files = pr.get("managed_files", []) or [] + managed_dirs = pr.get("managed_dirs", []) or [] role_dir = os.path.join(roles_root, role) _write_role_scaffold(role_dir) @@ -1823,10 +1859,13 @@ Generated from `{unit}`. notify_systemd="Run systemd daemon-reload", ) + dirs_var = _build_managed_dirs_var(managed_dirs) + jt_map = _yaml_load_mapping(jt_vars) if jt_vars.strip() else {} base_vars: Dict[str, Any] = { f"{var_prefix}_packages": pkgs, f"{var_prefix}_managed_files": files_var, + f"{var_prefix}_managed_dirs": dirs_var, } base_vars = _merge_mappings_overwrite(base_vars, jt_map) @@ -1836,6 +1875,7 @@ Generated from `{unit}`. { f"{var_prefix}_packages": [], f"{var_prefix}_managed_files": [], + f"{var_prefix}_managed_dirs": [], }, ) _write_hostvars(out_dir, fqdn or "", role, base_vars) diff --git a/enroll/remote.py b/enroll/remote.py index 9618512..b86cd08 100644 --- a/enroll/remote.py +++ b/enroll/remote.py @@ -16,7 +16,6 @@ def _safe_extract_tar(tar: tarfile.TarFile, dest: Path) -> None: Protects against path traversal (e.g. entries containing ../). """ - # Note: tar member names use POSIX separators regardless of platform. dest = dest.resolve() @@ -80,9 +79,18 @@ def _build_enroll_pyz(tmpdir: Path) -> Path: return pyz_path -def _ssh_run(ssh, cmd: str) -> tuple[int, str, str]: - """Run a command over a Paramiko SSHClient.""" - _stdin, stdout, stderr = ssh.exec_command(cmd) +def _ssh_run(ssh, cmd: str, *, get_pty: bool = False) -> tuple[int, str, str]: + """Run a command over a Paramiko SSHClient. + + Paramiko's exec_command runs commands without a TTY by default. + Some hosts have sudoers "requiretty" enabled, which causes sudo to + fail even when passwordless sudo is configured. For those commands, + request a PTY. + + We do not request a PTY for commands that stream binary data + (e.g. tar/gzip output), as a PTY can corrupt the byte stream. + """ + _stdin, stdout, stderr = ssh.exec_command(cmd, get_pty=get_pty) out = stdout.read().decode("utf-8", errors="replace") err = stderr.read().decode("utf-8", errors="replace") rc = stdout.channel.recv_exit_status() @@ -105,7 +113,6 @@ def remote_harvest( Returns the local path to state.json inside local_out_dir. """ - try: import paramiko # type: ignore except Exception as e: @@ -182,34 +189,35 @@ def remote_harvest( for p in exclude_paths or []: argv.extend(["--exclude-path", str(p)]) - _cmd = " ".join(shlex.quote(a) for a in argv) - if not no_sudo: - cmd = f"sudo {_cmd}" - else: - cmd = _cmd - rc, out, err = _ssh_run(ssh, cmd) + _cmd = " ".join(map(shlex.quote, argv)) + cmd = f"sudo {_cmd}" if not no_sudo else _cmd + + # PTY for sudo commands (helps sudoers requiretty). + rc, out, err = _ssh_run(ssh, cmd, get_pty=(not no_sudo)) if rc != 0: raise RuntimeError( "Remote harvest failed.\n" f"Command: {cmd}\n" f"Exit code: {rc}\n" + f"Stdout: {out.strip()}\n" f"Stderr: {err.strip()}" ) if not no_sudo: - # Ensure user can read the files, before we tar it + # Ensure user can read the files, before we tar it. if not resolved_user: raise RuntimeError( "Unable to determine remote username for chown. " "Pass --remote-user explicitly or use --no-sudo." ) cmd = f"sudo chown -R {resolved_user} {rbundle}" - rc, out, err = _ssh_run(ssh, cmd) + rc, out, err = _ssh_run(ssh, cmd, get_pty=True) if rc != 0: raise RuntimeError( "chown of harvest failed.\n" f"Command: {cmd}\n" f"Exit code: {rc}\n" + f"Stdout: {out.strip()}\n" f"Stderr: {err.strip()}" ) diff --git a/pyproject.toml b/pyproject.toml index 34f411e..72dd732 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "enroll" -version = "0.2.1" +version = "0.2.2" description = "Enroll a server's running state retrospectively into Ansible" authors = ["Miguel Jacq "] license = "GPL-3.0-or-later" diff --git a/release.sh b/release.sh index 0a052c7..db3f27b 100755 --- a/release.sh +++ b/release.sh @@ -44,14 +44,11 @@ for dist in ${DISTS[@]}; do done # RPM -REPO_ROOT="${HOME}/git/repo_rpm" -RPM_REPO="${REPO_ROOT}/rpm/x86_64" -BUILD_OUTPUT="${HOME}/git/enroll/dist" -REMOTE="letessier.mig5.net:/opt/repo_rpm" -KEYID="00AE817C24A10C2540461A9C1D7CDE0234DB458D" - -mkdir -p "$RPM_REPO" sudo apt-get -y install createrepo-c rpm +BUILD_OUTPUT="${HOME}/git/enroll/dist" +KEYID="00AE817C24A10C2540461A9C1D7CDE0234DB458D" +REPO_ROOT="${HOME}/git/repo_rpm" +REMOTE="letessier.mig5.net:/opt/repo_rpm" DISTS=( fedora:43 @@ -60,6 +57,10 @@ DISTS=( for dist in ${DISTS[@]}; do release=$(echo ${dist} | cut -d: -f2) + REPO_RELEASE_ROOT="${REPO_ROOT}/fc${release}" + RPM_REPO="${REPO_RELEASE_ROOT}/rpm/x86_64" + mkdir -p "$RPM_REPO" + docker build \ --no-cache \ -f Dockerfile.rpmbuild \ @@ -71,7 +72,6 @@ for dist in ${DISTS[@]}; do docker run --rm -v "$PWD":/src -v "$PWD/dist/rpm":/out -v "$HOME/git/jinjaturtle/dist/rpm":/deps:ro enroll-rpm:${release} sudo chown -R "${USER}" "$PWD/dist" - echo "==> Updating RPM repo..." for file in `ls -1 "${BUILD_OUTPUT}/rpm"`; do rpmsign --addsign "${BUILD_OUTPUT}/rpm/$file" done diff --git a/rpm/enroll.spec b/rpm/enroll.spec index 8fc8cac..12286fa 100644 --- a/rpm/enroll.spec +++ b/rpm/enroll.spec @@ -1,4 +1,4 @@ -%global upstream_version 0.2.1 +%global upstream_version 0.2.2 Name: enroll Version: %{upstream_version} @@ -43,6 +43,9 @@ Enroll a server's running state retrospectively into Ansible. %{_bindir}/enroll %changelog +* Sat Jan 02 2026 Miguel Jacq - %{version}-%{release} +- Fix stat() of parent directory so that we set directory perms correct on --include paths. +- Set pty for remote calls when sudo is required, to help systems with limits on sudo without pty * Fri Jan 01 2026 Miguel Jacq - %{version}-%{release} - Don't accidentally add extra_paths role to usr_local_custom list, resulting in extra_paths appearing twice in manifested playbook - Ensure directories in the tree of anything included with --include are defined in the state and manifest so we make dirs before we try to create files