Stricter validation of harvests to ensure that they meet the schema and don't contain unsafe artifacts (e.g symlinks pointing outside the artifact tree)

This commit is contained in:
Miguel Jacq 2026-06-22 09:55:38 +10:00
parent a85e8265f4
commit 706604df74
Signed by: mig5
GPG key ID: 03906B4110AAD3B8
6 changed files with 295 additions and 74 deletions

View file

@ -2,6 +2,7 @@
* BREAKING CHANGE: Group all package and systemd-unit roles into Debian Section/RPM Group roles by default, including managed config files and unit state. This mode is not used if `--fqdn` or `--no-common-roles` is set, in which case, the traditional behaviour of preserving one role per package/unit is used instead. * BREAKING CHANGE: Group all package and systemd-unit roles into Debian Section/RPM Group roles by default, including managed config files and unit state. This mode is not used if `--fqdn` or `--no-common-roles` is set, in which case, the traditional behaviour of preserving one role per package/unit is used instead.
* BREAKING CHANGE: Only capture user-specific .bashrc style files when using `--dangerous` mode, in case they contain sensitive env vars. * BREAKING CHANGE: Only capture user-specific .bashrc style files when using `--dangerous` mode, in case they contain sensitive env vars.
* BREAKING CHANGE: Don't allow reading `.enroll.ini` in the CWD. Use only the ENROLL_CONFIG env var, an explicit `--config` path or else the XDG default location (or `~/.config/enroll/enroll.ini` if `XDG_CONFIG_HOME` is not set).
* Detect active sysctl parameters and write them to a `/etc/sysctl.d/99-enroll.conf` file * Detect active sysctl parameters and write them to a `/etc/sysctl.d/99-enroll.conf` file
* Use `no_log` on systemd unit interrogations to suppress potential sensitive output when applying Ansible * Use `no_log` on systemd unit interrogations to suppress potential sensitive output when applying Ansible
* Support manifesting Puppet code, as well as Ansible! * Support manifesting Puppet code, as well as Ansible!
@ -10,6 +11,7 @@
* A lot of under-the-bonnet refactoring to make it easier to extend to cover other config managers (that don't suck) in future. * A lot of under-the-bonnet refactoring to make it easier to extend to cover other config managers (that don't suck) in future.
* Support for detecting Docker and Podman images and enforcing their presence (by SHA256 hash). * Support for detecting Docker and Podman images and enforcing their presence (by SHA256 hash).
* Add support for detecting Flatpaks and Snaps. * Add support for detecting Flatpaks and Snaps.
* Stricter validation of harvests to ensure that they meet the schema and don't contain unsafe artifacts (e.g symlinks pointing outside the artifact tree)
# 0.6.0 # 0.6.0

View file

@ -2,7 +2,6 @@ from __future__ import annotations
import os import os
import re import re
import shutil
import stat import stat
import tempfile import tempfile
from dataclasses import dataclass from dataclasses import dataclass
@ -14,6 +13,11 @@ from .jinjaturtle import (
jinjify_managed_files as _jinjify_managed_files, jinjify_managed_files as _jinjify_managed_files,
resolve_jinjaturtle_mode, resolve_jinjaturtle_mode,
) )
from .manifest_safety import (
copy_safe_artifact_file,
iter_safe_artifact_files,
prepare_manifest_output_dir,
)
from .role_names import avoid_reserved_role_name from .role_names import avoid_reserved_role_name
from .state import inventory_packages_from_state, roles_from_state from .state import inventory_packages_from_state, roles_from_state
from .yamlutil import yaml_dump_mapping, yaml_load_mapping from .yamlutil import yaml_dump_mapping, yaml_load_mapping
@ -408,7 +412,8 @@ def _prepare_ansible_context(
site_mode = fqdn is not None and fqdn != "" site_mode = fqdn is not None and fqdn != ""
jt_exe, jt_enabled = resolve_jinjaturtle_mode(jinjaturtle) jt_exe, jt_enabled = resolve_jinjaturtle_mode(jinjaturtle)
os.makedirs(out_dir, exist_ok=True) out = prepare_manifest_output_dir(out_dir, allow_existing=site_mode)
out_dir = str(out)
roles_root = os.path.join(out_dir, "roles") roles_root = os.path.join(out_dir, "roles")
os.makedirs(roles_root, exist_ok=True) os.makedirs(roles_root, exist_ok=True)
@ -445,7 +450,7 @@ def _copy2_replace(src: str, dst: str) -> None:
fd, tmp = tempfile.mkstemp(prefix=".enroll-tmp-", dir=dst_dir) fd, tmp = tempfile.mkstemp(prefix=".enroll-tmp-", dir=dst_dir)
os.close(fd) os.close(fd)
try: try:
shutil.copy2(src, tmp) copy_safe_artifact_file(src, tmp)
# Ensure the working tree stays mergeable: make the file user-writable. # Ensure the working tree stays mergeable: make the file user-writable.
st = os.stat(tmp, follow_symlinks=False) st = os.stat(tmp, follow_symlinks=False)
@ -475,29 +480,23 @@ def _copy_artifacts(
In --fqdn site mode, this is usually: In --fqdn site mode, this is usually:
inventory/host_vars/<fqdn>/<role>/.files inventory/host_vars/<fqdn>/<role>/.files
""" """
artifacts_dir = os.path.join(bundle_dir, "artifacts", role) for src, rel in iter_safe_artifact_files(bundle_dir, role):
if not os.path.isdir(artifacts_dir): dst = os.path.join(dst_files_dir, rel)
return
for root, _, files in os.walk(artifacts_dir):
for fn in files:
src = os.path.join(root, fn)
rel = os.path.relpath(src, artifacts_dir)
dst = os.path.join(dst_files_dir, rel)
# If a file was successfully templatised by JinjaTurtle, do NOT # If a file was successfully templatised by JinjaTurtle, do NOT
# also materialise the raw copy in the destination files dir. # also materialise the raw copy in the destination files dir.
if exclude_rels and rel in exclude_rels: if exclude_rels and rel in exclude_rels:
try: try:
if os.path.isfile(dst): if os.path.isfile(dst):
os.remove(dst) os.remove(dst)
except Exception: except Exception:
pass # nosec pass # nosec
continue continue
if preserve_existing and os.path.exists(dst): if preserve_existing and os.path.exists(dst):
continue continue
os.makedirs(os.path.dirname(dst), exist_ok=True) os.makedirs(os.path.dirname(dst), exist_ok=True)
_copy2_replace(src, dst) _copy2_replace(str(src), dst)
def _write_role_scaffold(role_dir: str) -> None: def _write_role_scaffold(role_dir: str) -> None:

170
enroll/manifest_safety.py Normal file
View file

@ -0,0 +1,170 @@
from __future__ import annotations
import os
import shutil
import stat
from pathlib import Path
from typing import Iterator, Tuple
class ArtifactSafetyError(RuntimeError):
"""Raised when a harvest artifact path is unsafe to consume."""
class ManifestOutputError(RuntimeError):
"""Raised when a manifest output path is unsafe to use."""
def _safe_relative_path(value: str, *, field: str) -> Path:
text = str(value or "").strip()
if not text:
raise ArtifactSafetyError(f"empty {field}")
if "\x00" in text:
raise ArtifactSafetyError(f"{field} contains NUL byte: {text!r}")
p = Path(text)
if p.is_absolute():
raise ArtifactSafetyError(f"{field} must be relative: {text!r}")
if any(part in {"", ".", ".."} for part in p.parts):
raise ArtifactSafetyError(f"{field} contains unsafe path component: {text!r}")
return p
def prepare_manifest_output_dir(
out_dir: str | Path, *, allow_existing: bool = False
) -> Path:
"""Create a manifest output directory, refusing to overwrite anything.
Rendering a manifest may be run by root and may target configuration-
management trees. Refuse an existing path rather than deleting or merging
with it by default; callers that intentionally support accumulation, such
as --fqdn site mode, may allow an existing directory but never a symlink or
non-directory path.
"""
out = Path(out_dir).expanduser()
if os.path.lexists(out):
if not allow_existing:
raise ManifestOutputError(
"manifest output path already exists; refusing to overwrite: " f"{out}"
)
st = out.lstat()
if stat.S_ISLNK(st.st_mode):
raise ManifestOutputError(
f"manifest output path is a symlink; refusing to use: {out}"
)
if not out.is_dir():
raise ManifestOutputError(
f"manifest output path exists but is not a directory: {out}"
)
return out
out.mkdir(parents=True, exist_ok=False)
return out
def _assert_no_symlink_components(path: Path, *, root: Path) -> None:
"""Reject symlinks in any existing path component between root and path."""
try:
rel = path.relative_to(root)
except ValueError as e:
raise ArtifactSafetyError(f"artifact path escapes artifact root: {path}") from e
cur = root
for part in rel.parts:
cur = cur / part
try:
st = cur.lstat()
except FileNotFoundError:
# Missing components are handled by the final caller where relevant.
return
if stat.S_ISLNK(st.st_mode):
raise ArtifactSafetyError(f"artifact path contains symlink: {cur}")
def safe_artifact_file(bundle_dir: str | Path, role: str, src_rel: str) -> Path:
"""Return a harvested artifact file path only if it is safe to copy.
The path must remain under artifacts/<role>, contain no absolute or '..'
components, contain no symlinks in any path component, and refer to a
regular, non-hardlinked file. This deliberately mirrors the tar extraction
hardening used for remote/SOPS/plain tarball bundles, but applies it to
directory bundles too.
"""
role_path = _safe_relative_path(role, field="artifact role")
src_path = _safe_relative_path(src_rel, field="artifact src_rel")
artifacts_root = Path(bundle_dir).expanduser() / "artifacts"
root = artifacts_root / role_path
candidate = root / src_path
if artifacts_root.exists():
st = artifacts_root.lstat()
if stat.S_ISLNK(st.st_mode):
raise ArtifactSafetyError(
f"artifacts directory is a symlink: {artifacts_root}"
)
if root.exists():
_assert_no_symlink_components(root, root=artifacts_root)
_assert_no_symlink_components(candidate, root=artifacts_root)
try:
st = candidate.lstat()
except FileNotFoundError:
raise
if stat.S_ISLNK(st.st_mode):
raise ArtifactSafetyError(f"artifact is a symlink: {candidate}")
if not stat.S_ISREG(st.st_mode):
raise ArtifactSafetyError(f"artifact is not a regular file: {candidate}")
if st.st_nlink > 1:
raise ArtifactSafetyError(f"artifact is hardlinked: {candidate}")
resolved_root = artifacts_root.resolve(strict=True)
resolved_candidate = candidate.resolve(strict=True)
try:
resolved_candidate.relative_to(resolved_root)
except ValueError as e:
raise ArtifactSafetyError(
f"artifact path escapes artifact root: {candidate}"
) from e
return candidate
def iter_safe_artifact_files(
bundle_dir: str | Path, role: str
) -> Iterator[Tuple[Path, str]]:
"""Yield safe artifact files for a role as (path, src_rel)."""
role_path = _safe_relative_path(role, field="artifact role")
artifacts_dir = Path(bundle_dir).expanduser() / "artifacts" / role_path
if not artifacts_dir.exists():
return
if not artifacts_dir.is_dir():
raise ArtifactSafetyError(
f"artifact role path is not a directory: {artifacts_dir}"
)
for root, dirs, files in os.walk(artifacts_dir, followlinks=False):
root_p = Path(root)
for dirname in list(dirs):
p = root_p / dirname
try:
st = p.lstat()
except FileNotFoundError:
continue
if stat.S_ISLNK(st.st_mode):
raise ArtifactSafetyError(f"artifact directory is a symlink: {p}")
for filename in files:
p = root_p / filename
rel = p.relative_to(artifacts_dir).as_posix()
yield safe_artifact_file(bundle_dir, role, rel), rel
def copy_safe_artifact_file(src: str | Path, dst: str | Path) -> None:
"""Copy an already validated artifact file without following symlinks."""
shutil.copy2(src, dst, follow_symlinks=False)

View file

@ -4,7 +4,6 @@ import hashlib
import json import json
import re import re
import shlex import shlex
import shutil
from pathlib import Path from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Set, Tuple from typing import Any, Dict, Iterable, List, Optional, Set, Tuple
@ -16,6 +15,11 @@ from .cm import (
role_order_key, role_order_key,
markdown_list, markdown_list,
) )
from .manifest_safety import (
copy_safe_artifact_file,
prepare_manifest_output_dir,
safe_artifact_file,
)
from .state import inventory_packages_from_state, roles_from_state from .state import inventory_packages_from_state, roles_from_state
from .jinjaturtle import ( from .jinjaturtle import (
can_jinjify_path, can_jinjify_path,
@ -628,13 +632,14 @@ def _copy_artifact(
) -> Optional[str]: ) -> Optional[str]:
if not role or not src_rel: if not role or not src_rel:
return None return None
src = Path(bundle_dir) / "artifacts" / role / src_rel try:
if not src.is_file(): src = safe_artifact_file(bundle_dir, role, src_rel)
except FileNotFoundError:
return None return None
module_rel = Path(dst_prefix or "") / src_rel module_rel = Path(dst_prefix or "") / src_rel
dst = dst_files_dir / module_rel dst = dst_files_dir / module_rel
dst.parent.mkdir(parents=True, exist_ok=True) dst.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(src, dst) copy_safe_artifact_file(src, dst)
return module_rel.as_posix() return module_rel.as_posix()
@ -1712,10 +1717,8 @@ class PuppetManifestRenderer:
no_common_roles = self.no_common_roles no_common_roles = self.no_common_roles
state = PuppetRole.load_state(bundle_dir) state = PuppetRole.load_state(bundle_dir)
out = Path(out_dir)
hiera_mode = bool(fqdn) hiera_mode = bool(fqdn)
if out.exists() and not hiera_mode: out = prepare_manifest_output_dir(out_dir, allow_existing=hiera_mode)
shutil.rmtree(out)
manifests_dir = out / "manifests" manifests_dir = out / "manifests"
modules_dir = out / "modules" modules_dir = out / "modules"
manifests_dir.mkdir(parents=True, exist_ok=True) manifests_dir.mkdir(parents=True, exist_ok=True)

View file

@ -17,6 +17,11 @@ from .cm import (
markdown_list, markdown_list,
) )
from .jinjaturtle import jinjify_artifact, resolve_jinjaturtle_mode from .jinjaturtle import jinjify_artifact, resolve_jinjaturtle_mode
from .manifest_safety import (
copy_safe_artifact_file,
prepare_manifest_output_dir,
safe_artifact_file,
)
from .state import inventory_packages_from_state, roles_from_state from .state import inventory_packages_from_state, roles_from_state
from .yamlutil import yaml_dump_mapping, yaml_load_mapping_file from .yamlutil import yaml_dump_mapping, yaml_load_mapping_file
@ -610,13 +615,14 @@ def _copy_artifact(
) -> Optional[str]: ) -> Optional[str]:
if not role or not src_rel: if not role or not src_rel:
return None return None
src = Path(bundle_dir) / "artifacts" / role / src_rel try:
if not src.is_file(): src = safe_artifact_file(bundle_dir, role, src_rel)
except FileNotFoundError:
return None return None
role_rel = Path(dst_prefix or "") / src_rel role_rel = Path(dst_prefix or "") / src_rel
dst = dst_files_dir / role_rel dst = dst_files_dir / role_rel
dst.parent.mkdir(parents=True, exist_ok=True) dst.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(src, dst) copy_safe_artifact_file(src, dst)
return role_rel.as_posix() return role_rel.as_posix()
@ -1687,13 +1693,11 @@ class SaltManifestRenderer:
def render(self) -> None: def render(self) -> None:
state = SaltRole.load_state(self.bundle_dir) state = SaltRole.load_state(self.bundle_dir)
out = Path(self.out_dir) fqdn_mode = bool(self.fqdn)
out = prepare_manifest_output_dir(self.out_dir, allow_existing=fqdn_mode)
states_dir = out / "states" states_dir = out / "states"
pillar_dir = out / "pillar" pillar_dir = out / "pillar"
fqdn_mode = bool(self.fqdn)
if out.exists() and not fqdn_mode:
shutil.rmtree(out)
states_dir.mkdir(parents=True, exist_ok=True) states_dir.mkdir(parents=True, exist_ok=True)
if fqdn_mode: if fqdn_mode:
pillar_dir.mkdir(parents=True, exist_ok=True) pillar_dir.mkdir(parents=True, exist_ok=True)

View file

@ -1,6 +1,8 @@
from __future__ import annotations from __future__ import annotations
import json import json
import os
import stat
import urllib.request import urllib.request
from dataclasses import dataclass from dataclasses import dataclass
from pathlib import Path from pathlib import Path
@ -9,6 +11,7 @@ from typing import Any, Dict, List, Optional, Set, Tuple
import jsonschema import jsonschema
from .diff import BundleRef, _bundle_from_input from .diff import BundleRef, _bundle_from_input
from .manifest_safety import ArtifactSafetyError, safe_artifact_file
from .state import load_state from .state import load_state
@ -171,7 +174,7 @@ def validate_harvest(
except Exception as e: # noqa: BLE001 except Exception as e: # noqa: BLE001
errors.append(f"failed to load/validate schema: {e!r}") errors.append(f"failed to load/validate schema: {e!r}")
# Artifact existence checks # Artifact existence and safety checks.
artifacts_dir = bundle.dir / "artifacts" artifacts_dir = bundle.dir / "artifacts"
referenced: Set[Tuple[str, str]] = set() referenced: Set[Tuple[str, str]] = set()
for role_name, mf in _iter_managed_files(state): for role_name, mf in _iter_managed_files(state):
@ -188,15 +191,15 @@ def validate_harvest(
continue continue
referenced.add((role_name, src_rel)) referenced.add((role_name, src_rel))
p = artifacts_dir / role_name / src_rel try:
if not p.exists(): safe_artifact_file(bundle.dir, role_name, src_rel)
except FileNotFoundError:
errors.append( errors.append(
f"missing artifact for role {role_name}: artifacts/{role_name}/{src_rel}" f"missing artifact for role {role_name}: artifacts/{role_name}/{src_rel}"
) )
continue except ArtifactSafetyError as e:
if not p.is_file():
errors.append( errors.append(
f"artifact is not a file for role {role_name}: artifacts/{role_name}/{src_rel}" f"unsafe artifact for role {role_name}: artifacts/{role_name}/{src_rel}: {e}"
) )
# Runtime firewall snapshots are generated artifacts rather than managed files. # Runtime firewall snapshots are generated artifacts rather than managed files.
@ -211,43 +214,83 @@ def validate_harvest(
f"firewall_runtime {key} has suspicious src_rel: {src_rel!r}" f"firewall_runtime {key} has suspicious src_rel: {src_rel!r}"
) )
continue continue
referenced.add( role_name = str(fw.get("role_name") or "firewall_runtime")
(str(fw.get("role_name") or "firewall_runtime"), src_rel) referenced.add((role_name, src_rel))
) try:
p = ( safe_artifact_file(bundle.dir, role_name, src_rel)
artifacts_dir except FileNotFoundError:
/ str(fw.get("role_name") or "firewall_runtime")
/ src_rel
)
if not p.exists():
errors.append( errors.append(
"missing firewall runtime artifact: " "missing firewall runtime artifact: "
f"artifacts/{fw.get('role_name') or 'firewall_runtime'}/{src_rel}" f"artifacts/{role_name}/{src_rel}"
) )
elif not p.is_file(): except ArtifactSafetyError as e:
errors.append( errors.append(
"firewall runtime artifact is not a file: " "unsafe firewall runtime artifact: "
f"artifacts/{fw.get('role_name') or 'firewall_runtime'}/{src_rel}" f"artifacts/{role_name}/{src_rel}: {e}"
) )
# Warn if there are extra files in artifacts not referenced. # Validate the whole artifact tree too, so unreferenced symlinks,
# hardlinks, special files, and path-shaping tricks do not survive
# validation simply because no managed_file currently references them.
if artifacts_dir.exists() and artifacts_dir.is_dir(): if artifacts_dir.exists() and artifacts_dir.is_dir():
for fp in artifacts_dir.rglob("*"): for root, dirs, files in os.walk(artifacts_dir, followlinks=False):
if not fp.is_file(): root_p = Path(root)
continue for name in list(dirs):
try: fp = root_p / name
rel = fp.relative_to(artifacts_dir) try:
except ValueError: st = fp.lstat()
continue except FileNotFoundError:
parts = rel.parts continue
if len(parts) < 2: if stat.S_ISLNK(st.st_mode):
continue errors.append(f"artifact directory is a symlink: {fp}")
role_name = parts[0] elif not stat.S_ISDIR(st.st_mode):
src_rel = "/".join(parts[1:]) errors.append(f"artifact directory is not a directory: {fp}")
if (role_name, src_rel) not in referenced:
warnings.append( for name in files:
f"unreferenced artifact present: artifacts/{role_name}/{src_rel}" fp = root_p / name
) try:
st = fp.lstat()
except FileNotFoundError:
continue
try:
rel = fp.relative_to(artifacts_dir)
except ValueError:
errors.append(f"artifact escapes artifact root: {fp}")
continue
parts = rel.parts
if len(parts) < 2:
errors.append(f"artifact is not under a role directory: {fp}")
continue
role_name = parts[0]
src_rel = "/".join(parts[1:])
if stat.S_ISLNK(st.st_mode):
errors.append(
f"artifact is a symlink: artifacts/{role_name}/{src_rel}"
)
continue
if not stat.S_ISREG(st.st_mode):
errors.append(
f"artifact is not a regular file: artifacts/{role_name}/{src_rel}"
)
continue
if st.st_nlink > 1:
errors.append(
f"artifact is hardlinked: artifacts/{role_name}/{src_rel}"
)
continue
try:
safe_artifact_file(bundle.dir, role_name, src_rel)
except (FileNotFoundError, ArtifactSafetyError) as e:
errors.append(
f"unsafe artifact: artifacts/{role_name}/{src_rel}: {e}"
)
continue
if (role_name, src_rel) not in referenced:
warnings.append(
f"unreferenced artifact present: artifacts/{role_name}/{src_rel}"
)
return ValidationResult(errors=errors, warnings=warnings) return ValidationResult(errors=errors, warnings=warnings)
finally: finally: