From 706604df74204ce291e5a4f0b2a240de9063a79a Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 22 Jun 2026 09:55:38 +1000 Subject: [PATCH] Stricter validation of harvests to ensure that they meet the schema and don't contain unsafe artifacts (e.g symlinks pointing outside the artifact tree) --- CHANGELOG.md | 2 + enroll/ansible.py | 47 ++++++----- enroll/manifest_safety.py | 170 ++++++++++++++++++++++++++++++++++++++ enroll/puppet.py | 17 ++-- enroll/salt.py | 18 ++-- enroll/validate.py | 115 ++++++++++++++++++-------- 6 files changed, 295 insertions(+), 74 deletions(-) create mode 100644 enroll/manifest_safety.py diff --git a/CHANGELOG.md b/CHANGELOG.md index d6213bc..2195732 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ * BREAKING CHANGE: Group all package and systemd-unit roles into Debian Section/RPM Group roles by default, including managed config files and unit state. This mode is not used if `--fqdn` or `--no-common-roles` is set, in which case, the traditional behaviour of preserving one role per package/unit is used instead. * BREAKING CHANGE: Only capture user-specific .bashrc style files when using `--dangerous` mode, in case they contain sensitive env vars. + * BREAKING CHANGE: Don't allow reading `.enroll.ini` in the CWD. Use only the ENROLL_CONFIG env var, an explicit `--config` path or else the XDG default location (or `~/.config/enroll/enroll.ini` if `XDG_CONFIG_HOME` is not set). * Detect active sysctl parameters and write them to a `/etc/sysctl.d/99-enroll.conf` file * Use `no_log` on systemd unit interrogations to suppress potential sensitive output when applying Ansible * Support manifesting Puppet code, as well as Ansible! @@ -10,6 +11,7 @@ * A lot of under-the-bonnet refactoring to make it easier to extend to cover other config managers (that don't suck) in future. * Support for detecting Docker and Podman images and enforcing their presence (by SHA256 hash). * Add support for detecting Flatpaks and Snaps. + * Stricter validation of harvests to ensure that they meet the schema and don't contain unsafe artifacts (e.g symlinks pointing outside the artifact tree) # 0.6.0 diff --git a/enroll/ansible.py b/enroll/ansible.py index 9794de3..e0fcd0c 100644 --- a/enroll/ansible.py +++ b/enroll/ansible.py @@ -2,7 +2,6 @@ from __future__ import annotations import os import re -import shutil import stat import tempfile from dataclasses import dataclass @@ -14,6 +13,11 @@ from .jinjaturtle import ( jinjify_managed_files as _jinjify_managed_files, resolve_jinjaturtle_mode, ) +from .manifest_safety import ( + copy_safe_artifact_file, + iter_safe_artifact_files, + prepare_manifest_output_dir, +) from .role_names import avoid_reserved_role_name from .state import inventory_packages_from_state, roles_from_state from .yamlutil import yaml_dump_mapping, yaml_load_mapping @@ -408,7 +412,8 @@ def _prepare_ansible_context( site_mode = fqdn is not None and fqdn != "" jt_exe, jt_enabled = resolve_jinjaturtle_mode(jinjaturtle) - os.makedirs(out_dir, exist_ok=True) + out = prepare_manifest_output_dir(out_dir, allow_existing=site_mode) + out_dir = str(out) roles_root = os.path.join(out_dir, "roles") os.makedirs(roles_root, exist_ok=True) @@ -445,7 +450,7 @@ def _copy2_replace(src: str, dst: str) -> None: fd, tmp = tempfile.mkstemp(prefix=".enroll-tmp-", dir=dst_dir) os.close(fd) try: - shutil.copy2(src, tmp) + copy_safe_artifact_file(src, tmp) # Ensure the working tree stays mergeable: make the file user-writable. st = os.stat(tmp, follow_symlinks=False) @@ -475,29 +480,23 @@ def _copy_artifacts( In --fqdn site mode, this is usually: inventory/host_vars///.files """ - artifacts_dir = os.path.join(bundle_dir, "artifacts", role) - if not os.path.isdir(artifacts_dir): - return - for root, _, files in os.walk(artifacts_dir): - for fn in files: - src = os.path.join(root, fn) - rel = os.path.relpath(src, artifacts_dir) - dst = os.path.join(dst_files_dir, rel) + for src, rel in iter_safe_artifact_files(bundle_dir, role): + dst = os.path.join(dst_files_dir, rel) - # If a file was successfully templatised by JinjaTurtle, do NOT - # also materialise the raw copy in the destination files dir. - if exclude_rels and rel in exclude_rels: - try: - if os.path.isfile(dst): - os.remove(dst) - except Exception: - pass # nosec - continue + # If a file was successfully templatised by JinjaTurtle, do NOT + # also materialise the raw copy in the destination files dir. + if exclude_rels and rel in exclude_rels: + try: + if os.path.isfile(dst): + os.remove(dst) + except Exception: + pass # nosec + continue - if preserve_existing and os.path.exists(dst): - continue - os.makedirs(os.path.dirname(dst), exist_ok=True) - _copy2_replace(src, dst) + if preserve_existing and os.path.exists(dst): + continue + os.makedirs(os.path.dirname(dst), exist_ok=True) + _copy2_replace(str(src), dst) def _write_role_scaffold(role_dir: str) -> None: diff --git a/enroll/manifest_safety.py b/enroll/manifest_safety.py new file mode 100644 index 0000000..77baafc --- /dev/null +++ b/enroll/manifest_safety.py @@ -0,0 +1,170 @@ +from __future__ import annotations + +import os +import shutil +import stat +from pathlib import Path +from typing import Iterator, Tuple + + +class ArtifactSafetyError(RuntimeError): + """Raised when a harvest artifact path is unsafe to consume.""" + + +class ManifestOutputError(RuntimeError): + """Raised when a manifest output path is unsafe to use.""" + + +def _safe_relative_path(value: str, *, field: str) -> Path: + text = str(value or "").strip() + if not text: + raise ArtifactSafetyError(f"empty {field}") + if "\x00" in text: + raise ArtifactSafetyError(f"{field} contains NUL byte: {text!r}") + p = Path(text) + if p.is_absolute(): + raise ArtifactSafetyError(f"{field} must be relative: {text!r}") + if any(part in {"", ".", ".."} for part in p.parts): + raise ArtifactSafetyError(f"{field} contains unsafe path component: {text!r}") + return p + + +def prepare_manifest_output_dir( + out_dir: str | Path, *, allow_existing: bool = False +) -> Path: + """Create a manifest output directory, refusing to overwrite anything. + + Rendering a manifest may be run by root and may target configuration- + management trees. Refuse an existing path rather than deleting or merging + with it by default; callers that intentionally support accumulation, such + as --fqdn site mode, may allow an existing directory but never a symlink or + non-directory path. + """ + + out = Path(out_dir).expanduser() + if os.path.lexists(out): + if not allow_existing: + raise ManifestOutputError( + "manifest output path already exists; refusing to overwrite: " f"{out}" + ) + st = out.lstat() + if stat.S_ISLNK(st.st_mode): + raise ManifestOutputError( + f"manifest output path is a symlink; refusing to use: {out}" + ) + if not out.is_dir(): + raise ManifestOutputError( + f"manifest output path exists but is not a directory: {out}" + ) + return out + out.mkdir(parents=True, exist_ok=False) + return out + + +def _assert_no_symlink_components(path: Path, *, root: Path) -> None: + """Reject symlinks in any existing path component between root and path.""" + + try: + rel = path.relative_to(root) + except ValueError as e: + raise ArtifactSafetyError(f"artifact path escapes artifact root: {path}") from e + + cur = root + for part in rel.parts: + cur = cur / part + try: + st = cur.lstat() + except FileNotFoundError: + # Missing components are handled by the final caller where relevant. + return + if stat.S_ISLNK(st.st_mode): + raise ArtifactSafetyError(f"artifact path contains symlink: {cur}") + + +def safe_artifact_file(bundle_dir: str | Path, role: str, src_rel: str) -> Path: + """Return a harvested artifact file path only if it is safe to copy. + + The path must remain under artifacts/, contain no absolute or '..' + components, contain no symlinks in any path component, and refer to a + regular, non-hardlinked file. This deliberately mirrors the tar extraction + hardening used for remote/SOPS/plain tarball bundles, but applies it to + directory bundles too. + """ + + role_path = _safe_relative_path(role, field="artifact role") + src_path = _safe_relative_path(src_rel, field="artifact src_rel") + + artifacts_root = Path(bundle_dir).expanduser() / "artifacts" + root = artifacts_root / role_path + candidate = root / src_path + + if artifacts_root.exists(): + st = artifacts_root.lstat() + if stat.S_ISLNK(st.st_mode): + raise ArtifactSafetyError( + f"artifacts directory is a symlink: {artifacts_root}" + ) + + if root.exists(): + _assert_no_symlink_components(root, root=artifacts_root) + + _assert_no_symlink_components(candidate, root=artifacts_root) + + try: + st = candidate.lstat() + except FileNotFoundError: + raise + + if stat.S_ISLNK(st.st_mode): + raise ArtifactSafetyError(f"artifact is a symlink: {candidate}") + if not stat.S_ISREG(st.st_mode): + raise ArtifactSafetyError(f"artifact is not a regular file: {candidate}") + if st.st_nlink > 1: + raise ArtifactSafetyError(f"artifact is hardlinked: {candidate}") + + resolved_root = artifacts_root.resolve(strict=True) + resolved_candidate = candidate.resolve(strict=True) + try: + resolved_candidate.relative_to(resolved_root) + except ValueError as e: + raise ArtifactSafetyError( + f"artifact path escapes artifact root: {candidate}" + ) from e + + return candidate + + +def iter_safe_artifact_files( + bundle_dir: str | Path, role: str +) -> Iterator[Tuple[Path, str]]: + """Yield safe artifact files for a role as (path, src_rel).""" + + role_path = _safe_relative_path(role, field="artifact role") + artifacts_dir = Path(bundle_dir).expanduser() / "artifacts" / role_path + if not artifacts_dir.exists(): + return + if not artifacts_dir.is_dir(): + raise ArtifactSafetyError( + f"artifact role path is not a directory: {artifacts_dir}" + ) + + for root, dirs, files in os.walk(artifacts_dir, followlinks=False): + root_p = Path(root) + for dirname in list(dirs): + p = root_p / dirname + try: + st = p.lstat() + except FileNotFoundError: + continue + if stat.S_ISLNK(st.st_mode): + raise ArtifactSafetyError(f"artifact directory is a symlink: {p}") + for filename in files: + p = root_p / filename + rel = p.relative_to(artifacts_dir).as_posix() + yield safe_artifact_file(bundle_dir, role, rel), rel + + +def copy_safe_artifact_file(src: str | Path, dst: str | Path) -> None: + """Copy an already validated artifact file without following symlinks.""" + + shutil.copy2(src, dst, follow_symlinks=False) diff --git a/enroll/puppet.py b/enroll/puppet.py index 021ae5f..1b78789 100644 --- a/enroll/puppet.py +++ b/enroll/puppet.py @@ -4,7 +4,6 @@ import hashlib import json import re import shlex -import shutil from pathlib import Path from typing import Any, Dict, Iterable, List, Optional, Set, Tuple @@ -16,6 +15,11 @@ from .cm import ( role_order_key, markdown_list, ) +from .manifest_safety import ( + copy_safe_artifact_file, + prepare_manifest_output_dir, + safe_artifact_file, +) from .state import inventory_packages_from_state, roles_from_state from .jinjaturtle import ( can_jinjify_path, @@ -628,13 +632,14 @@ def _copy_artifact( ) -> Optional[str]: if not role or not src_rel: return None - src = Path(bundle_dir) / "artifacts" / role / src_rel - if not src.is_file(): + try: + src = safe_artifact_file(bundle_dir, role, src_rel) + except FileNotFoundError: return None module_rel = Path(dst_prefix or "") / src_rel dst = dst_files_dir / module_rel dst.parent.mkdir(parents=True, exist_ok=True) - shutil.copy2(src, dst) + copy_safe_artifact_file(src, dst) return module_rel.as_posix() @@ -1712,10 +1717,8 @@ class PuppetManifestRenderer: no_common_roles = self.no_common_roles state = PuppetRole.load_state(bundle_dir) - out = Path(out_dir) hiera_mode = bool(fqdn) - if out.exists() and not hiera_mode: - shutil.rmtree(out) + out = prepare_manifest_output_dir(out_dir, allow_existing=hiera_mode) manifests_dir = out / "manifests" modules_dir = out / "modules" manifests_dir.mkdir(parents=True, exist_ok=True) diff --git a/enroll/salt.py b/enroll/salt.py index 1b0e043..27ec915 100644 --- a/enroll/salt.py +++ b/enroll/salt.py @@ -17,6 +17,11 @@ from .cm import ( markdown_list, ) from .jinjaturtle import jinjify_artifact, resolve_jinjaturtle_mode +from .manifest_safety import ( + copy_safe_artifact_file, + prepare_manifest_output_dir, + safe_artifact_file, +) from .state import inventory_packages_from_state, roles_from_state from .yamlutil import yaml_dump_mapping, yaml_load_mapping_file @@ -610,13 +615,14 @@ def _copy_artifact( ) -> Optional[str]: if not role or not src_rel: return None - src = Path(bundle_dir) / "artifacts" / role / src_rel - if not src.is_file(): + try: + src = safe_artifact_file(bundle_dir, role, src_rel) + except FileNotFoundError: return None role_rel = Path(dst_prefix or "") / src_rel dst = dst_files_dir / role_rel dst.parent.mkdir(parents=True, exist_ok=True) - shutil.copy2(src, dst) + copy_safe_artifact_file(src, dst) return role_rel.as_posix() @@ -1687,13 +1693,11 @@ class SaltManifestRenderer: def render(self) -> None: state = SaltRole.load_state(self.bundle_dir) - out = Path(self.out_dir) + fqdn_mode = bool(self.fqdn) + out = prepare_manifest_output_dir(self.out_dir, allow_existing=fqdn_mode) states_dir = out / "states" pillar_dir = out / "pillar" - fqdn_mode = bool(self.fqdn) - if out.exists() and not fqdn_mode: - shutil.rmtree(out) states_dir.mkdir(parents=True, exist_ok=True) if fqdn_mode: pillar_dir.mkdir(parents=True, exist_ok=True) diff --git a/enroll/validate.py b/enroll/validate.py index 0a3e8cb..60d7f7c 100644 --- a/enroll/validate.py +++ b/enroll/validate.py @@ -1,6 +1,8 @@ from __future__ import annotations import json +import os +import stat import urllib.request from dataclasses import dataclass from pathlib import Path @@ -9,6 +11,7 @@ from typing import Any, Dict, List, Optional, Set, Tuple import jsonschema from .diff import BundleRef, _bundle_from_input +from .manifest_safety import ArtifactSafetyError, safe_artifact_file from .state import load_state @@ -171,7 +174,7 @@ def validate_harvest( except Exception as e: # noqa: BLE001 errors.append(f"failed to load/validate schema: {e!r}") - # Artifact existence checks + # Artifact existence and safety checks. artifacts_dir = bundle.dir / "artifacts" referenced: Set[Tuple[str, str]] = set() for role_name, mf in _iter_managed_files(state): @@ -188,15 +191,15 @@ def validate_harvest( continue referenced.add((role_name, src_rel)) - p = artifacts_dir / role_name / src_rel - if not p.exists(): + try: + safe_artifact_file(bundle.dir, role_name, src_rel) + except FileNotFoundError: errors.append( f"missing artifact for role {role_name}: artifacts/{role_name}/{src_rel}" ) - continue - if not p.is_file(): + except ArtifactSafetyError as e: errors.append( - f"artifact is not a file for role {role_name}: artifacts/{role_name}/{src_rel}" + f"unsafe artifact for role {role_name}: artifacts/{role_name}/{src_rel}: {e}" ) # Runtime firewall snapshots are generated artifacts rather than managed files. @@ -211,43 +214,83 @@ def validate_harvest( f"firewall_runtime {key} has suspicious src_rel: {src_rel!r}" ) continue - referenced.add( - (str(fw.get("role_name") or "firewall_runtime"), src_rel) - ) - p = ( - artifacts_dir - / str(fw.get("role_name") or "firewall_runtime") - / src_rel - ) - if not p.exists(): + role_name = str(fw.get("role_name") or "firewall_runtime") + referenced.add((role_name, src_rel)) + try: + safe_artifact_file(bundle.dir, role_name, src_rel) + except FileNotFoundError: errors.append( "missing firewall runtime artifact: " - f"artifacts/{fw.get('role_name') or 'firewall_runtime'}/{src_rel}" + f"artifacts/{role_name}/{src_rel}" ) - elif not p.is_file(): + except ArtifactSafetyError as e: errors.append( - "firewall runtime artifact is not a file: " - f"artifacts/{fw.get('role_name') or 'firewall_runtime'}/{src_rel}" + "unsafe firewall runtime artifact: " + f"artifacts/{role_name}/{src_rel}: {e}" ) - # Warn if there are extra files in artifacts not referenced. + # Validate the whole artifact tree too, so unreferenced symlinks, + # hardlinks, special files, and path-shaping tricks do not survive + # validation simply because no managed_file currently references them. if artifacts_dir.exists() and artifacts_dir.is_dir(): - for fp in artifacts_dir.rglob("*"): - if not fp.is_file(): - continue - try: - rel = fp.relative_to(artifacts_dir) - except ValueError: - continue - parts = rel.parts - if len(parts) < 2: - continue - role_name = parts[0] - src_rel = "/".join(parts[1:]) - if (role_name, src_rel) not in referenced: - warnings.append( - f"unreferenced artifact present: artifacts/{role_name}/{src_rel}" - ) + for root, dirs, files in os.walk(artifacts_dir, followlinks=False): + root_p = Path(root) + for name in list(dirs): + fp = root_p / name + try: + st = fp.lstat() + except FileNotFoundError: + continue + if stat.S_ISLNK(st.st_mode): + errors.append(f"artifact directory is a symlink: {fp}") + elif not stat.S_ISDIR(st.st_mode): + errors.append(f"artifact directory is not a directory: {fp}") + + for name in files: + fp = root_p / name + try: + st = fp.lstat() + except FileNotFoundError: + continue + try: + rel = fp.relative_to(artifacts_dir) + except ValueError: + errors.append(f"artifact escapes artifact root: {fp}") + continue + parts = rel.parts + if len(parts) < 2: + errors.append(f"artifact is not under a role directory: {fp}") + continue + role_name = parts[0] + src_rel = "/".join(parts[1:]) + + if stat.S_ISLNK(st.st_mode): + errors.append( + f"artifact is a symlink: artifacts/{role_name}/{src_rel}" + ) + continue + if not stat.S_ISREG(st.st_mode): + errors.append( + f"artifact is not a regular file: artifacts/{role_name}/{src_rel}" + ) + continue + if st.st_nlink > 1: + errors.append( + f"artifact is hardlinked: artifacts/{role_name}/{src_rel}" + ) + continue + try: + safe_artifact_file(bundle.dir, role_name, src_rel) + except (FileNotFoundError, ArtifactSafetyError) as e: + errors.append( + f"unsafe artifact: artifacts/{role_name}/{src_rel}: {e}" + ) + continue + + if (role_name, src_rel) not in referenced: + warnings.append( + f"unreferenced artifact present: artifacts/{role_name}/{src_rel}" + ) return ValidationResult(errors=errors, warnings=warnings) finally: