Ensure paths are not followed through parent links

This commit is contained in:
Miguel Jacq 2026-06-22 15:32:40 +10:00
parent e10a3f62b0
commit 07b07e60c5
Signed by: mig5
GPG key ID: 03906B4110AAD3B8
9 changed files with 323 additions and 23 deletions

View file

@ -146,7 +146,12 @@ def is_human_user(uid: int, shell: str, uid_min: int) -> bool:
def find_user_ssh_files(home: str) -> List[str]: def find_user_ssh_files(home: str) -> List[str]:
sshdir = os.path.join(home, ".ssh") sshdir = os.path.join(home, ".ssh")
out: List[str] = [] out: List[str] = []
if not os.path.isdir(sshdir): # ``os.path.isdir`` follows symlinks, so a user who replaces ``~/.ssh``
# with a link to a sensitive directory (e.g. /etc/ssl/private) could
# otherwise have a regular file inside it harvested through the symlinked
# parent. Refuse a symlinked .ssh outright; capture_file() applies the
# same parent-symlink protection at copy time as defense in depth.
if os.path.islink(sshdir) or not os.path.isdir(sshdir):
return out return out
ak = os.path.join(sshdir, "authorized_keys") ak = os.path.join(sshdir, "authorized_keys")

View file

@ -5,7 +5,7 @@ import errno
import stat import stat
from typing import List, Optional, Set from typing import List, Optional, Set
from .fsutil import stat_triplet, stat_triplet_from_stat from .fsutil import open_no_follow_path, stat_triplet, stat_triplet_from_stat
from .harvest_types import ExcludedFile, ManagedFile, ManagedLink from .harvest_types import ExcludedFile, ManagedFile, ManagedLink
from .ignore import IgnorePolicy from .ignore import IgnorePolicy
from .pathfilter import PathFilter from .pathfilter import PathFilter
@ -55,10 +55,7 @@ def files_differ(a: str, b: str, *, max_bytes: int = 2_000_000) -> bool:
def _open_no_follow_write(path: str, mode: int = 0o600) -> int: def _open_no_follow_write(path: str, mode: int = 0o600) -> int:
flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL | getattr(os, "O_CLOEXEC", 0) return open_no_follow_path(path, write=True, mode=mode)
if hasattr(os, "O_NOFOLLOW"):
flags |= os.O_NOFOLLOW
return os.open(path, flags, mode)
def write_bytes_into_bundle( def write_bytes_into_bundle(
@ -92,14 +89,10 @@ def copy_into_bundle(
symlinks at copy time and refuses destination symlink overwrites. symlinks at copy time and refuses destination symlink overwrites.
""" """
flags = os.O_RDONLY | getattr(os, "O_CLOEXEC", 0)
if hasattr(os, "O_NOFOLLOW"):
flags |= os.O_NOFOLLOW
fd = -1 fd = -1
try: try:
try: try:
fd = os.open(abs_path, flags) fd = open_no_follow_path(abs_path)
except OSError as e: except OSError as e:
if e.errno in {errno.ELOOP, errno.ENOTDIR}: if e.errno in {errno.ELOOP, errno.ENOTDIR}:
raise OSError("refusing to copy symlink source") from e raise OSError("refusing to copy symlink source") from e

View file

@ -189,6 +189,12 @@ _EXCLUDED_REASONS: Dict[str, ReasonInfo] = {
"Not a regular file", "Not a regular file",
"Excluded because it was not a regular file (device, socket, etc.).", "Excluded because it was not a regular file (device, socket, etc.).",
), ),
"symlink_component": ReasonInfo(
"Unsafe symlinked path",
"Excluded because a directory in the path was a symlink, which could "
"redirect capture into a sensitive location; Enroll refuses to follow "
"symlinked parents when harvesting files.",
),
"binary_like": ReasonInfo( "binary_like": ReasonInfo(
"Binary-like", "Binary-like",
"Excluded because it looked like binary content (not useful for config management).", "Excluded because it looked like binary content (not useful for config management).",

View file

@ -1,10 +1,131 @@
from __future__ import annotations from __future__ import annotations
import errno
import hashlib import hashlib
import os import os
import stat
from typing import Tuple from typing import Tuple
def open_no_follow_path(path: str, *, write: bool = False, mode: int = 0o600) -> int:
"""Open ``path`` without following a symlink in *any* path component.
``O_NOFOLLOW`` only protects the final component of a path. A regular
file reached through a symlinked *parent* directory (for example a user
replacing ``~/.ssh`` with a link to a sensitive directory) would still be
opened by a plain ``os.open(path, O_NOFOLLOW)``.
This helper resolves the path one component at a time with ``openat``
semantics:
- each intermediate component is opened relative to its parent's
descriptor without following symlinks;
- the final component is opened with ``O_NOFOLLOW`` (read, or
``O_WRONLY | O_CREAT | O_EXCL`` when ``write`` is True).
The important detail is that intermediate components are opened with
``O_PATH | O_NOFOLLOW`` when ``O_PATH`` is available, and then verified
with ``fstat()``. On Linux, ``O_RDONLY | O_DIRECTORY | O_NOFOLLOW`` is not
sufficient for this job: a symlink whose target is a directory can still be
opened as the target directory on some kernels. Opening with ``O_PATH`` and
checking the resulting descriptor reliably exposes such a component as a
symlink instead.
A symlink (or a ``..`` component) anywhere in the path raises
``OSError(ELOOP)``. On platforms without ``openat``/``O_DIRECTORY``
support, this falls back to a single ``O_NOFOLLOW`` open of the whole path,
which is no worse than the historical behaviour.
"""
cloexec = getattr(os, "O_CLOEXEC", 0)
nofollow = getattr(os, "O_NOFOLLOW", 0)
o_directory = getattr(os, "O_DIRECTORY", 0)
o_path = getattr(os, "O_PATH", 0)
if write:
final_flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL | cloexec | nofollow
else:
final_flags = os.O_RDONLY | cloexec | nofollow
supports_openat = bool(
o_directory and nofollow and os.open in getattr(os, "supports_dir_fd", set())
)
if not supports_openat:
return os.open(path, final_flags, mode)
absolute = path.startswith("/")
parts = [p for p in path.split("/") if p not in ("", ".")]
if not parts:
return os.open(path, final_flags, mode)
*parent_parts, leaf = parts
# Use O_PATH for directory descriptors when available. O_PATH descriptors
# can be used as dir_fd anchors for later openat-style calls, and with
# O_NOFOLLOW they let us fstat() a symlink component instead of silently
# following it. If O_PATH is unavailable, use O_RDONLY and an lstat()
# pre-check for intermediate components as a best-effort fallback.
dir_base_flags = (o_path if o_path else os.O_RDONLY) | cloexec | o_directory
component_flags = (
(o_path if o_path else os.O_RDONLY) | cloexec | o_directory | nofollow
)
dir_fd = os.open("/" if absolute else ".", dir_base_flags)
try:
for component in parent_parts:
if component == "..":
raise OSError(errno.ELOOP, "unsafe '..' path component", path)
if not o_path:
# Best-effort fallback for platforms without O_PATH. This is not
# as race-resistant as the descriptor-only path, but it avoids
# known symlink parents where we cannot open the component itself
# as a non-followed O_PATH descriptor.
try:
st = os.lstat(component, dir_fd=dir_fd)
except OSError:
raise
if stat.S_ISLNK(st.st_mode):
raise OSError(errno.ELOOP, "symlinked path component", path)
if not stat.S_ISDIR(st.st_mode):
raise OSError(errno.ENOTDIR, "non-directory path component", path)
try:
next_fd = os.open(component, component_flags, dir_fd=dir_fd)
except OSError as e:
if e.errno in {errno.ELOOP, errno.ENOTDIR}:
try:
st = os.lstat(component, dir_fd=dir_fd)
except OSError:
raise
if stat.S_ISLNK(st.st_mode):
raise OSError(
errno.ELOOP,
"symlinked path component",
path,
) from e
raise
try:
st = os.fstat(next_fd)
if stat.S_ISLNK(st.st_mode):
raise OSError(errno.ELOOP, "symlinked path component", path)
if not stat.S_ISDIR(st.st_mode):
raise OSError(errno.ENOTDIR, "non-directory path component", path)
except Exception:
os.close(next_fd)
raise
os.close(dir_fd)
dir_fd = next_fd
if leaf == "..":
raise OSError(errno.ELOOP, "unsafe '..' path component", path)
return os.open(leaf, final_flags, mode, dir_fd=dir_fd)
finally:
os.close(dir_fd)
def stat_triplet_from_stat(st: os.stat_result) -> Tuple[str, str, str]: def stat_triplet_from_stat(st: os.stat_result) -> Tuple[str, str, str]:
"""Return (owner, group, mode) for an existing stat result.""" """Return (owner, group, mode) for an existing stat result."""

View file

@ -8,6 +8,8 @@ import stat
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional from typing import Optional
from .fsutil import open_no_follow_path
DEFAULT_DENY_GLOBS = [ DEFAULT_DENY_GLOBS = [
# Common backup copies created by passwd tools (can contain sensitive data) # Common backup copies created by passwd tools (can contain sensitive data)
@ -213,26 +215,33 @@ class IgnorePolicy:
def inspect_file(self, path: str) -> tuple[Optional[str], Optional[FileInspection]]: def inspect_file(self, path: str) -> tuple[Optional[str], Optional[FileInspection]]:
"""Safely inspect a regular file and return the exact bytes to copy. """Safely inspect a regular file and return the exact bytes to copy.
The source is opened with O_NOFOLLOW where available, fstat() is taken The source is opened with O_NOFOLLOW on every path component (see
from that file descriptor, and the whole file is read only after the ``fsutil.open_no_follow_path``), fstat() is taken from that file
size cap passes. With the default 256 KiB cap this avoids a memory DoS descriptor, and the whole file is read only after the size cap passes.
while ensuring secret scanning covers every byte that may be copied. With the default 256 KiB cap this avoids a memory DoS while ensuring
secret scanning covers every byte that may be copied.
Opening every component without following symlinks means a regular
file reached through a symlinked *parent* directory is refused with
``symlink_component`` rather than silently captured -- its logical
path would not have matched the deny globs.
""" """
deny = self._path_deny_reason(path) deny = self._path_deny_reason(path)
if deny: if deny:
return deny, None return deny, None
flags = os.O_RDONLY | getattr(os, "O_CLOEXEC", 0)
if hasattr(os, "O_NOFOLLOW"):
flags |= os.O_NOFOLLOW
fd: Optional[int] = None fd: Optional[int] = None
try: try:
try: try:
fd = os.open(path, flags) fd = open_no_follow_path(path)
except OSError as e: except OSError as e:
if e.errno in {errno.ELOOP, errno.ENOTDIR}: if e.errno == errno.ELOOP:
# A symlink (or unsafe '..') somewhere in the path. This is
# distinct from "not a regular file" so operators can see
# why a path under a symlinked parent was skipped.
return "symlink_component", None
if e.errno == errno.ENOTDIR:
return "not_regular_file", None return "not_regular_file", None
return "unreadable", None return "unreadable", None

View file

@ -221,6 +221,25 @@ def test_find_user_ssh_files_ignores_symlink(tmp_path: Path):
assert result == [] assert result == []
def test_find_user_ssh_files_ignores_symlinked_ssh_dir(tmp_path: Path):
"""A user who replaces ~/.ssh with a symlink to a sensitive directory must
not have files inside it harvested through the symlinked parent. os.path.isdir
follows symlinks, so the directory itself must be checked with islink().
"""
from enroll.accounts import find_user_ssh_files
sensitive = tmp_path / "sensitive"
sensitive.mkdir()
(sensitive / "authorized_keys").write_text("ssh-rsa AAAA...\n", encoding="utf-8")
home = tmp_path / "home" / "mallory"
home.mkdir(parents=True)
os.symlink(str(sensitive), str(home / ".ssh"))
assert find_user_ssh_files(str(home)) == []
def test_find_user_ssh_files_handles_home_not_starting_with_slash(): def test_find_user_ssh_files_handles_home_not_starting_with_slash():
from enroll.accounts import find_user_ssh_files from enroll.accounts import find_user_ssh_files

View file

@ -23,3 +23,54 @@ def test_stat_triplet_reports_mode(tmp_path: Path):
assert mode == "0600" assert mode == "0600"
assert owner # non-empty string assert owner # non-empty string
assert group # non-empty string assert group # non-empty string
def test_open_no_follow_path_reads_regular_file(tmp_path: Path):
from enroll.fsutil import open_no_follow_path
nested = tmp_path / "a" / "b"
nested.mkdir(parents=True)
f = nested / "file.txt"
f.write_text("hello\n", encoding="utf-8")
fd = open_no_follow_path(str(f))
try:
assert os.read(fd, 100) == b"hello\n"
finally:
os.close(fd)
def test_open_no_follow_path_refuses_symlinked_parent(tmp_path: Path):
import errno
from enroll.fsutil import open_no_follow_path
real = tmp_path / "real"
real.mkdir()
(real / "file.txt").write_text("x\n", encoding="utf-8")
(tmp_path / "link").symlink_to(real)
try:
fd = open_no_follow_path(str(tmp_path / "link" / "file.txt"))
os.close(fd)
raise AssertionError("expected OSError for symlinked parent")
except OSError as e:
assert e.errno == errno.ELOOP
def test_open_no_follow_path_refuses_symlinked_leaf(tmp_path: Path):
import errno
from enroll.fsutil import open_no_follow_path
target = tmp_path / "target.txt"
target.write_text("x\n", encoding="utf-8")
link = tmp_path / "link.txt"
link.symlink_to(target)
try:
fd = open_no_follow_path(str(link))
os.close(fd)
raise AssertionError("expected OSError for symlinked leaf")
except OSError as e:
assert e.errno == errno.ELOOP

View file

@ -102,7 +102,45 @@ def test_capture_file_rejects_symlink_source_with_ignore_policy(tmp_path: Path):
assert ok is False assert ok is False
assert managed == [] assert managed == []
assert excluded and excluded[0].reason == "not_regular_file" # Symlinked sources are now reported with the dedicated symlink_component
# reason (covers both symlinked leaves and symlinked parent directories),
# which is more precise than the old generic not_regular_file.
assert excluded and excluded[0].reason == "symlink_component"
def test_capture_file_rejects_symlinked_parent_with_ignore_policy(tmp_path: Path):
"""O_NOFOLLOW only guards the final component. A regular file reached
through a symlinked *parent* directory must still be refused, otherwise a
file whose real location is deny-globbed could be captured while its
logical (recorded) path looks safe.
"""
secret = tmp_path / "secretroot"
secret.mkdir()
(secret / "config").write_text("listen_port=8080\n", encoding="utf-8")
(tmp_path / "allowed").symlink_to(secret, target_is_directory=True)
bundle = tmp_path / "bundle"
bundle.mkdir()
managed: list[ManagedFile] = []
excluded: list[ExcludedFile] = []
ok = capture_file(
bundle_dir=str(bundle),
role_name="role",
abs_path=str(tmp_path / "allowed" / "config"),
reason="test",
policy=IgnorePolicy(),
path_filter=PathFilter(),
managed_out=managed,
excluded_out=excluded,
)
assert ok is False
assert managed == []
assert excluded and excluded[0].reason == "symlink_component"
# Nothing should have been written into the bundle.
artifact = bundle / "artifacts" / "role" / "allowed" / "config"
assert not artifact.exists()
def test_prepare_new_private_dir_rejects_symlink_parent(tmp_path: Path): def test_prepare_new_private_dir_rejects_symlink_parent(tmp_path: Path):

View file

@ -236,7 +236,10 @@ def test_deny_reason_symlink_file(tmp_path: Path):
link = tmp_path / "link" link = tmp_path / "link"
os.symlink(str(real_file), str(link)) os.symlink(str(real_file), str(link))
reason = pol.deny_reason(str(link)) reason = pol.deny_reason(str(link))
assert reason == "not_regular_file" # A symlinked path (final component or parent) is refused with the
# dedicated symlink_component reason so operators can tell symlink
# redirection apart from genuine non-regular files (sockets, devices).
assert reason == "symlink_component"
def test_deny_reason_logs(tmp_path: Path): def test_deny_reason_logs(tmp_path: Path):
@ -358,3 +361,58 @@ def test_noncanonical_backup_and_log_fastpaths():
assert pol._path_deny_reason("/var/log/foo/../bar.log") == "log_file" assert pol._path_deny_reason("/var/log/foo/../bar.log") == "log_file"
assert pol._path_deny_reason("/etc/foo/../something~") == "backup_file" assert pol._path_deny_reason("/etc/foo/../something~") == "backup_file"
assert pol._path_deny_reason("/etc//passwd-") == "backup_file" assert pol._path_deny_reason("/etc//passwd-") == "backup_file"
def test_inspect_file_refuses_symlinked_parent_directory(tmp_path: Path):
"""A regular file reached through a symlinked *parent* directory must be
refused, even though O_NOFOLLOW alone would only guard the final
component. Otherwise a file whose real location is deny-globbed (or whose
content is benign) could be captured while its logical path looks safe.
"""
pol = IgnorePolicy()
secret = tmp_path / "secretroot"
secret.mkdir()
(secret / "config").write_text("listen_port=8080\n", encoding="utf-8")
(tmp_path / "allowed").symlink_to(secret)
reason, inspection = pol.inspect_file(str(tmp_path / "allowed" / "config"))
assert reason == "symlink_component"
assert inspection is None
def test_inspect_file_refuses_denyglob_evasion_via_symlinked_parent(tmp_path: Path):
"""The strongest variant: the real file lives under a deny-globbed dir,
but is reached via a symlinked parent so the *logical* path does not match
the deny glob. Content is non-secret-looking (DH params), so only the
parent-symlink check stands between the operator and disclosure.
"""
pol = IgnorePolicy()
realdir = tmp_path / "ssl_private"
realdir.mkdir()
(realdir / "dhparam.pem").write_text(
"-----BEGIN DH PARAMETERS-----\nMII...\n-----END DH PARAMETERS-----\n",
encoding="utf-8",
)
(tmp_path / "innocent").symlink_to(realdir)
reason, inspection = pol.inspect_file(str(tmp_path / "innocent" / "dhparam.pem"))
assert reason == "symlink_component"
assert inspection is None
def test_inspect_file_still_captures_normal_nested_file(tmp_path: Path):
"""Regression guard: ordinary files in real (non-symlinked) directories
must still be inspected and returned.
"""
pol = IgnorePolicy()
nested = tmp_path / "etc" / "myapp"
nested.mkdir(parents=True)
(nested / "app.conf").write_text("workers=4\n", encoding="utf-8")
reason, inspection = pol.inspect_file(str(nested / "app.conf"))
assert reason is None
assert inspection is not None
assert inspection.data == b"workers=4\n"