Belts and braces: normalise paths before globbing
This commit is contained in:
parent
c4448226c0
commit
e10a3f62b0
2 changed files with 97 additions and 10 deletions
|
|
@ -97,6 +97,34 @@ BLOCK_START = b"/*"
|
|||
BLOCK_END = b"*/"
|
||||
|
||||
|
||||
def normalize_for_match(path: str) -> str:
|
||||
"""Lexically normalize a path string for deny/allow glob matching.
|
||||
|
||||
This collapses redundant separators ("//"), resolves "." and ".."
|
||||
segments, and strips trailing slashes using ``os.path.normpath`` -- a
|
||||
pure string operation that never touches the filesystem.
|
||||
|
||||
It is deliberately NOT ``os.path.realpath``/``Path.resolve``: resolving
|
||||
symlinks would stat the filesystem and reintroduce a time-of-check /
|
||||
time-of-use window before the later ``O_NOFOLLOW`` open in
|
||||
``inspect_file``. The goal here is only to stop a non-canonical *string*
|
||||
(e.g. "/etc//shadow" or "/etc/foo/../shadow") from slipping past a deny
|
||||
glob like "/etc/shadow". It is defense-in-depth on top of the no-follow
|
||||
open, not a load-bearing control by itself.
|
||||
|
||||
``normpath`` preserves a leading "//" because POSIX treats it as
|
||||
implementation-defined; for glob matching we collapse it to a single
|
||||
leading slash so patterns anchored at "/" still match.
|
||||
"""
|
||||
|
||||
if not path:
|
||||
return path
|
||||
normalized = os.path.normpath(path)
|
||||
if normalized.startswith("//") and not normalized.startswith("///"):
|
||||
normalized = normalized[1:]
|
||||
return normalized
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class FileInspection:
|
||||
"""Bytes and metadata captured from one safely-opened source file."""
|
||||
|
|
@ -145,26 +173,31 @@ class IgnorePolicy:
|
|||
yield raw
|
||||
|
||||
def _path_deny_reason(self, path: str) -> Optional[str]:
|
||||
# Match against a lexically-normalized path so non-canonical spellings
|
||||
# (e.g. "/etc//shadow", "/etc/foo/../shadow") cannot slip past a deny
|
||||
# glob. The original path is still what gets opened/recorded.
|
||||
match_path = normalize_for_match(path)
|
||||
# Always ignore plain *.log files (rarely useful as config, often noisy).
|
||||
if path.endswith(".log"):
|
||||
if match_path.endswith(".log"):
|
||||
return "log_file"
|
||||
# Ignore editor/backup files that end with a trailing tilde.
|
||||
if path.endswith("~"):
|
||||
if match_path.endswith("~"):
|
||||
return "backup_file"
|
||||
# Ignore backup shadow files
|
||||
if path.startswith("/etc/") and path.endswith("-"):
|
||||
if match_path.startswith("/etc/") and match_path.endswith("-"):
|
||||
return "backup_file"
|
||||
|
||||
if not self.dangerous:
|
||||
for g in self.deny_globs or []:
|
||||
if fnmatch.fnmatch(path, g):
|
||||
if fnmatch.fnmatch(match_path, g):
|
||||
return "denied_path"
|
||||
return None
|
||||
|
||||
def _content_deny_reason(self, path: str, data: bytes) -> Optional[str]:
|
||||
if b"\x00" in data:
|
||||
match_path = normalize_for_match(path)
|
||||
for g in self.allow_binary_globs or []:
|
||||
if fnmatch.fnmatch(path, g):
|
||||
if fnmatch.fnmatch(match_path, g):
|
||||
# Binary is acceptable for explicitly-allowed paths.
|
||||
return None
|
||||
return "binary_like"
|
||||
|
|
@ -251,8 +284,9 @@ class IgnorePolicy:
|
|||
No size checks or content scanning are performed for directories.
|
||||
"""
|
||||
if not self.dangerous:
|
||||
match_path = normalize_for_match(path)
|
||||
for g in self.deny_globs or []:
|
||||
if fnmatch.fnmatch(path, g):
|
||||
if fnmatch.fnmatch(match_path, g):
|
||||
return "denied_path"
|
||||
|
||||
try:
|
||||
|
|
@ -283,16 +317,17 @@ class IgnorePolicy:
|
|||
"""
|
||||
|
||||
# Keep the same fast-path filename ignores as deny_reason().
|
||||
if path.endswith(".log"):
|
||||
match_path = normalize_for_match(path)
|
||||
if match_path.endswith(".log"):
|
||||
return "log_file"
|
||||
if path.endswith("~"):
|
||||
if match_path.endswith("~"):
|
||||
return "backup_file"
|
||||
if path.startswith("/etc/") and path.endswith("-"):
|
||||
if match_path.startswith("/etc/") and match_path.endswith("-"):
|
||||
return "backup_file"
|
||||
|
||||
if not self.dangerous:
|
||||
for g in self.deny_globs or []:
|
||||
if fnmatch.fnmatch(path, g):
|
||||
if fnmatch.fnmatch(match_path, g):
|
||||
return "denied_path"
|
||||
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -306,3 +306,55 @@ def test_secret_scan_reads_whole_file_under_size_cap(tmp_path):
|
|||
p = tmp_path / "large.conf"
|
||||
p.write_bytes(b"A" * 70_000 + b"\nlate_token = abc123\n")
|
||||
assert IgnorePolicy().deny_reason(str(p)) == "sensitive_content"
|
||||
|
||||
|
||||
def test_normalize_for_match_collapses_noncanonical_paths():
|
||||
from enroll.ignore import normalize_for_match
|
||||
|
||||
assert normalize_for_match("/etc/shadow") == "/etc/shadow"
|
||||
assert normalize_for_match("/etc//shadow") == "/etc/shadow"
|
||||
assert normalize_for_match("/etc/foo/../shadow") == "/etc/shadow"
|
||||
assert normalize_for_match("/etc/./shadow") == "/etc/shadow"
|
||||
assert normalize_for_match("/etc/shadow/") == "/etc/shadow"
|
||||
# A leading "//" is POSIX-significant to normpath but must collapse for
|
||||
# glob matching anchored at "/".
|
||||
assert normalize_for_match("//etc/shadow") == "/etc/shadow"
|
||||
# "///" collapses to "/" via normpath already; ensure we don't mangle it.
|
||||
assert normalize_for_match("///etc/shadow") == "/etc/shadow"
|
||||
# Empty stays empty (no crash).
|
||||
assert normalize_for_match("") == ""
|
||||
|
||||
|
||||
def test_deny_reason_denies_noncanonical_sensitive_paths():
|
||||
# Regression: non-canonical spellings of a denied path must still be denied
|
||||
# rather than slipping past the deny glob. Defense-in-depth on top of the
|
||||
# O_NOFOLLOW open in inspect_file(); see normalize_for_match().
|
||||
pol = IgnorePolicy()
|
||||
assert pol._path_deny_reason("/etc//shadow") == "denied_path"
|
||||
assert pol._path_deny_reason("/etc/foo/../shadow") == "denied_path"
|
||||
assert pol._path_deny_reason("/etc/./shadow") == "denied_path"
|
||||
assert pol._path_deny_reason("/etc/ssl/private/../private/key") == "denied_path"
|
||||
assert pol._path_deny_reason("//etc/shadow") == "denied_path"
|
||||
# A normal config path is unaffected.
|
||||
assert pol._path_deny_reason("/etc/nginx/nginx.conf") is None
|
||||
|
||||
|
||||
def test_deny_reason_dir_denies_noncanonical_sensitive_paths():
|
||||
pol = IgnorePolicy()
|
||||
# normpath("/etc/ssl/private/../private") -> "/etc/ssl/private" which is the
|
||||
# glob root itself, so use paths that still resolve to a child of it.
|
||||
assert pol.deny_reason_dir("/etc/ssl/private/sub/../child") == "denied_path"
|
||||
assert pol.deny_reason_dir("/etc//ssl/private/sub") == "denied_path"
|
||||
|
||||
|
||||
def test_deny_reason_link_denies_noncanonical_sensitive_paths():
|
||||
pol = IgnorePolicy()
|
||||
assert pol.deny_reason_link("/etc/ssh/../ssh/ssh_host_rsa_key") == "denied_path"
|
||||
assert pol.deny_reason_link("/etc//ssh/ssh_host_ed25519_key") == "denied_path"
|
||||
|
||||
|
||||
def test_noncanonical_backup_and_log_fastpaths():
|
||||
pol = IgnorePolicy()
|
||||
assert pol._path_deny_reason("/var/log/foo/../bar.log") == "log_file"
|
||||
assert pol._path_deny_reason("/etc/foo/../something~") == "backup_file"
|
||||
assert pol._path_deny_reason("/etc//passwd-") == "backup_file"
|
||||
|
|
|
|||
Reference in a new issue