Belts and braces: normalise paths before globbing
This commit is contained in:
parent
c4448226c0
commit
e10a3f62b0
2 changed files with 97 additions and 10 deletions
|
|
@ -97,6 +97,34 @@ BLOCK_START = b"/*"
|
||||||
BLOCK_END = b"*/"
|
BLOCK_END = b"*/"
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_for_match(path: str) -> str:
|
||||||
|
"""Lexically normalize a path string for deny/allow glob matching.
|
||||||
|
|
||||||
|
This collapses redundant separators ("//"), resolves "." and ".."
|
||||||
|
segments, and strips trailing slashes using ``os.path.normpath`` -- a
|
||||||
|
pure string operation that never touches the filesystem.
|
||||||
|
|
||||||
|
It is deliberately NOT ``os.path.realpath``/``Path.resolve``: resolving
|
||||||
|
symlinks would stat the filesystem and reintroduce a time-of-check /
|
||||||
|
time-of-use window before the later ``O_NOFOLLOW`` open in
|
||||||
|
``inspect_file``. The goal here is only to stop a non-canonical *string*
|
||||||
|
(e.g. "/etc//shadow" or "/etc/foo/../shadow") from slipping past a deny
|
||||||
|
glob like "/etc/shadow". It is defense-in-depth on top of the no-follow
|
||||||
|
open, not a load-bearing control by itself.
|
||||||
|
|
||||||
|
``normpath`` preserves a leading "//" because POSIX treats it as
|
||||||
|
implementation-defined; for glob matching we collapse it to a single
|
||||||
|
leading slash so patterns anchored at "/" still match.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if not path:
|
||||||
|
return path
|
||||||
|
normalized = os.path.normpath(path)
|
||||||
|
if normalized.startswith("//") and not normalized.startswith("///"):
|
||||||
|
normalized = normalized[1:]
|
||||||
|
return normalized
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
class FileInspection:
|
class FileInspection:
|
||||||
"""Bytes and metadata captured from one safely-opened source file."""
|
"""Bytes and metadata captured from one safely-opened source file."""
|
||||||
|
|
@ -145,26 +173,31 @@ class IgnorePolicy:
|
||||||
yield raw
|
yield raw
|
||||||
|
|
||||||
def _path_deny_reason(self, path: str) -> Optional[str]:
|
def _path_deny_reason(self, path: str) -> Optional[str]:
|
||||||
|
# Match against a lexically-normalized path so non-canonical spellings
|
||||||
|
# (e.g. "/etc//shadow", "/etc/foo/../shadow") cannot slip past a deny
|
||||||
|
# glob. The original path is still what gets opened/recorded.
|
||||||
|
match_path = normalize_for_match(path)
|
||||||
# Always ignore plain *.log files (rarely useful as config, often noisy).
|
# Always ignore plain *.log files (rarely useful as config, often noisy).
|
||||||
if path.endswith(".log"):
|
if match_path.endswith(".log"):
|
||||||
return "log_file"
|
return "log_file"
|
||||||
# Ignore editor/backup files that end with a trailing tilde.
|
# Ignore editor/backup files that end with a trailing tilde.
|
||||||
if path.endswith("~"):
|
if match_path.endswith("~"):
|
||||||
return "backup_file"
|
return "backup_file"
|
||||||
# Ignore backup shadow files
|
# Ignore backup shadow files
|
||||||
if path.startswith("/etc/") and path.endswith("-"):
|
if match_path.startswith("/etc/") and match_path.endswith("-"):
|
||||||
return "backup_file"
|
return "backup_file"
|
||||||
|
|
||||||
if not self.dangerous:
|
if not self.dangerous:
|
||||||
for g in self.deny_globs or []:
|
for g in self.deny_globs or []:
|
||||||
if fnmatch.fnmatch(path, g):
|
if fnmatch.fnmatch(match_path, g):
|
||||||
return "denied_path"
|
return "denied_path"
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _content_deny_reason(self, path: str, data: bytes) -> Optional[str]:
|
def _content_deny_reason(self, path: str, data: bytes) -> Optional[str]:
|
||||||
if b"\x00" in data:
|
if b"\x00" in data:
|
||||||
|
match_path = normalize_for_match(path)
|
||||||
for g in self.allow_binary_globs or []:
|
for g in self.allow_binary_globs or []:
|
||||||
if fnmatch.fnmatch(path, g):
|
if fnmatch.fnmatch(match_path, g):
|
||||||
# Binary is acceptable for explicitly-allowed paths.
|
# Binary is acceptable for explicitly-allowed paths.
|
||||||
return None
|
return None
|
||||||
return "binary_like"
|
return "binary_like"
|
||||||
|
|
@ -251,8 +284,9 @@ class IgnorePolicy:
|
||||||
No size checks or content scanning are performed for directories.
|
No size checks or content scanning are performed for directories.
|
||||||
"""
|
"""
|
||||||
if not self.dangerous:
|
if not self.dangerous:
|
||||||
|
match_path = normalize_for_match(path)
|
||||||
for g in self.deny_globs or []:
|
for g in self.deny_globs or []:
|
||||||
if fnmatch.fnmatch(path, g):
|
if fnmatch.fnmatch(match_path, g):
|
||||||
return "denied_path"
|
return "denied_path"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
@ -283,16 +317,17 @@ class IgnorePolicy:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Keep the same fast-path filename ignores as deny_reason().
|
# Keep the same fast-path filename ignores as deny_reason().
|
||||||
if path.endswith(".log"):
|
match_path = normalize_for_match(path)
|
||||||
|
if match_path.endswith(".log"):
|
||||||
return "log_file"
|
return "log_file"
|
||||||
if path.endswith("~"):
|
if match_path.endswith("~"):
|
||||||
return "backup_file"
|
return "backup_file"
|
||||||
if path.startswith("/etc/") and path.endswith("-"):
|
if match_path.startswith("/etc/") and match_path.endswith("-"):
|
||||||
return "backup_file"
|
return "backup_file"
|
||||||
|
|
||||||
if not self.dangerous:
|
if not self.dangerous:
|
||||||
for g in self.deny_globs or []:
|
for g in self.deny_globs or []:
|
||||||
if fnmatch.fnmatch(path, g):
|
if fnmatch.fnmatch(match_path, g):
|
||||||
return "denied_path"
|
return "denied_path"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
|
|
@ -306,3 +306,55 @@ def test_secret_scan_reads_whole_file_under_size_cap(tmp_path):
|
||||||
p = tmp_path / "large.conf"
|
p = tmp_path / "large.conf"
|
||||||
p.write_bytes(b"A" * 70_000 + b"\nlate_token = abc123\n")
|
p.write_bytes(b"A" * 70_000 + b"\nlate_token = abc123\n")
|
||||||
assert IgnorePolicy().deny_reason(str(p)) == "sensitive_content"
|
assert IgnorePolicy().deny_reason(str(p)) == "sensitive_content"
|
||||||
|
|
||||||
|
|
||||||
|
def test_normalize_for_match_collapses_noncanonical_paths():
|
||||||
|
from enroll.ignore import normalize_for_match
|
||||||
|
|
||||||
|
assert normalize_for_match("/etc/shadow") == "/etc/shadow"
|
||||||
|
assert normalize_for_match("/etc//shadow") == "/etc/shadow"
|
||||||
|
assert normalize_for_match("/etc/foo/../shadow") == "/etc/shadow"
|
||||||
|
assert normalize_for_match("/etc/./shadow") == "/etc/shadow"
|
||||||
|
assert normalize_for_match("/etc/shadow/") == "/etc/shadow"
|
||||||
|
# A leading "//" is POSIX-significant to normpath but must collapse for
|
||||||
|
# glob matching anchored at "/".
|
||||||
|
assert normalize_for_match("//etc/shadow") == "/etc/shadow"
|
||||||
|
# "///" collapses to "/" via normpath already; ensure we don't mangle it.
|
||||||
|
assert normalize_for_match("///etc/shadow") == "/etc/shadow"
|
||||||
|
# Empty stays empty (no crash).
|
||||||
|
assert normalize_for_match("") == ""
|
||||||
|
|
||||||
|
|
||||||
|
def test_deny_reason_denies_noncanonical_sensitive_paths():
|
||||||
|
# Regression: non-canonical spellings of a denied path must still be denied
|
||||||
|
# rather than slipping past the deny glob. Defense-in-depth on top of the
|
||||||
|
# O_NOFOLLOW open in inspect_file(); see normalize_for_match().
|
||||||
|
pol = IgnorePolicy()
|
||||||
|
assert pol._path_deny_reason("/etc//shadow") == "denied_path"
|
||||||
|
assert pol._path_deny_reason("/etc/foo/../shadow") == "denied_path"
|
||||||
|
assert pol._path_deny_reason("/etc/./shadow") == "denied_path"
|
||||||
|
assert pol._path_deny_reason("/etc/ssl/private/../private/key") == "denied_path"
|
||||||
|
assert pol._path_deny_reason("//etc/shadow") == "denied_path"
|
||||||
|
# A normal config path is unaffected.
|
||||||
|
assert pol._path_deny_reason("/etc/nginx/nginx.conf") is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_deny_reason_dir_denies_noncanonical_sensitive_paths():
|
||||||
|
pol = IgnorePolicy()
|
||||||
|
# normpath("/etc/ssl/private/../private") -> "/etc/ssl/private" which is the
|
||||||
|
# glob root itself, so use paths that still resolve to a child of it.
|
||||||
|
assert pol.deny_reason_dir("/etc/ssl/private/sub/../child") == "denied_path"
|
||||||
|
assert pol.deny_reason_dir("/etc//ssl/private/sub") == "denied_path"
|
||||||
|
|
||||||
|
|
||||||
|
def test_deny_reason_link_denies_noncanonical_sensitive_paths():
|
||||||
|
pol = IgnorePolicy()
|
||||||
|
assert pol.deny_reason_link("/etc/ssh/../ssh/ssh_host_rsa_key") == "denied_path"
|
||||||
|
assert pol.deny_reason_link("/etc//ssh/ssh_host_ed25519_key") == "denied_path"
|
||||||
|
|
||||||
|
|
||||||
|
def test_noncanonical_backup_and_log_fastpaths():
|
||||||
|
pol = IgnorePolicy()
|
||||||
|
assert pol._path_deny_reason("/var/log/foo/../bar.log") == "log_file"
|
||||||
|
assert pol._path_deny_reason("/etc/foo/../something~") == "backup_file"
|
||||||
|
assert pol._path_deny_reason("/etc//passwd-") == "backup_file"
|
||||||
|
|
|
||||||
Reference in a new issue