From e10a3f62b0a033dec254e7888379ad39911819fc Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 22 Jun 2026 15:06:46 +1000 Subject: [PATCH] Belts and braces: normalise paths before globbing --- enroll/ignore.py | 55 ++++++++++++++++++++++++++++++++++++-------- tests/test_ignore.py | 52 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+), 10 deletions(-) diff --git a/enroll/ignore.py b/enroll/ignore.py index 4e64984..dc1532f 100644 --- a/enroll/ignore.py +++ b/enroll/ignore.py @@ -97,6 +97,34 @@ BLOCK_START = b"/*" BLOCK_END = b"*/" +def normalize_for_match(path: str) -> str: + """Lexically normalize a path string for deny/allow glob matching. + + This collapses redundant separators ("//"), resolves "." and ".." + segments, and strips trailing slashes using ``os.path.normpath`` -- a + pure string operation that never touches the filesystem. + + It is deliberately NOT ``os.path.realpath``/``Path.resolve``: resolving + symlinks would stat the filesystem and reintroduce a time-of-check / + time-of-use window before the later ``O_NOFOLLOW`` open in + ``inspect_file``. The goal here is only to stop a non-canonical *string* + (e.g. "/etc//shadow" or "/etc/foo/../shadow") from slipping past a deny + glob like "/etc/shadow". It is defense-in-depth on top of the no-follow + open, not a load-bearing control by itself. + + ``normpath`` preserves a leading "//" because POSIX treats it as + implementation-defined; for glob matching we collapse it to a single + leading slash so patterns anchored at "/" still match. + """ + + if not path: + return path + normalized = os.path.normpath(path) + if normalized.startswith("//") and not normalized.startswith("///"): + normalized = normalized[1:] + return normalized + + @dataclass(frozen=True) class FileInspection: """Bytes and metadata captured from one safely-opened source file.""" @@ -145,26 +173,31 @@ class IgnorePolicy: yield raw def _path_deny_reason(self, path: str) -> Optional[str]: + # Match against a lexically-normalized path so non-canonical spellings + # (e.g. "/etc//shadow", "/etc/foo/../shadow") cannot slip past a deny + # glob. The original path is still what gets opened/recorded. + match_path = normalize_for_match(path) # Always ignore plain *.log files (rarely useful as config, often noisy). - if path.endswith(".log"): + if match_path.endswith(".log"): return "log_file" # Ignore editor/backup files that end with a trailing tilde. - if path.endswith("~"): + if match_path.endswith("~"): return "backup_file" # Ignore backup shadow files - if path.startswith("/etc/") and path.endswith("-"): + if match_path.startswith("/etc/") and match_path.endswith("-"): return "backup_file" if not self.dangerous: for g in self.deny_globs or []: - if fnmatch.fnmatch(path, g): + if fnmatch.fnmatch(match_path, g): return "denied_path" return None def _content_deny_reason(self, path: str, data: bytes) -> Optional[str]: if b"\x00" in data: + match_path = normalize_for_match(path) for g in self.allow_binary_globs or []: - if fnmatch.fnmatch(path, g): + if fnmatch.fnmatch(match_path, g): # Binary is acceptable for explicitly-allowed paths. return None return "binary_like" @@ -251,8 +284,9 @@ class IgnorePolicy: No size checks or content scanning are performed for directories. """ if not self.dangerous: + match_path = normalize_for_match(path) for g in self.deny_globs or []: - if fnmatch.fnmatch(path, g): + if fnmatch.fnmatch(match_path, g): return "denied_path" try: @@ -283,16 +317,17 @@ class IgnorePolicy: """ # Keep the same fast-path filename ignores as deny_reason(). - if path.endswith(".log"): + match_path = normalize_for_match(path) + if match_path.endswith(".log"): return "log_file" - if path.endswith("~"): + if match_path.endswith("~"): return "backup_file" - if path.startswith("/etc/") and path.endswith("-"): + if match_path.startswith("/etc/") and match_path.endswith("-"): return "backup_file" if not self.dangerous: for g in self.deny_globs or []: - if fnmatch.fnmatch(path, g): + if fnmatch.fnmatch(match_path, g): return "denied_path" try: diff --git a/tests/test_ignore.py b/tests/test_ignore.py index 9c3fac8..227bf65 100644 --- a/tests/test_ignore.py +++ b/tests/test_ignore.py @@ -306,3 +306,55 @@ def test_secret_scan_reads_whole_file_under_size_cap(tmp_path): p = tmp_path / "large.conf" p.write_bytes(b"A" * 70_000 + b"\nlate_token = abc123\n") assert IgnorePolicy().deny_reason(str(p)) == "sensitive_content" + + +def test_normalize_for_match_collapses_noncanonical_paths(): + from enroll.ignore import normalize_for_match + + assert normalize_for_match("/etc/shadow") == "/etc/shadow" + assert normalize_for_match("/etc//shadow") == "/etc/shadow" + assert normalize_for_match("/etc/foo/../shadow") == "/etc/shadow" + assert normalize_for_match("/etc/./shadow") == "/etc/shadow" + assert normalize_for_match("/etc/shadow/") == "/etc/shadow" + # A leading "//" is POSIX-significant to normpath but must collapse for + # glob matching anchored at "/". + assert normalize_for_match("//etc/shadow") == "/etc/shadow" + # "///" collapses to "/" via normpath already; ensure we don't mangle it. + assert normalize_for_match("///etc/shadow") == "/etc/shadow" + # Empty stays empty (no crash). + assert normalize_for_match("") == "" + + +def test_deny_reason_denies_noncanonical_sensitive_paths(): + # Regression: non-canonical spellings of a denied path must still be denied + # rather than slipping past the deny glob. Defense-in-depth on top of the + # O_NOFOLLOW open in inspect_file(); see normalize_for_match(). + pol = IgnorePolicy() + assert pol._path_deny_reason("/etc//shadow") == "denied_path" + assert pol._path_deny_reason("/etc/foo/../shadow") == "denied_path" + assert pol._path_deny_reason("/etc/./shadow") == "denied_path" + assert pol._path_deny_reason("/etc/ssl/private/../private/key") == "denied_path" + assert pol._path_deny_reason("//etc/shadow") == "denied_path" + # A normal config path is unaffected. + assert pol._path_deny_reason("/etc/nginx/nginx.conf") is None + + +def test_deny_reason_dir_denies_noncanonical_sensitive_paths(): + pol = IgnorePolicy() + # normpath("/etc/ssl/private/../private") -> "/etc/ssl/private" which is the + # glob root itself, so use paths that still resolve to a child of it. + assert pol.deny_reason_dir("/etc/ssl/private/sub/../child") == "denied_path" + assert pol.deny_reason_dir("/etc//ssl/private/sub") == "denied_path" + + +def test_deny_reason_link_denies_noncanonical_sensitive_paths(): + pol = IgnorePolicy() + assert pol.deny_reason_link("/etc/ssh/../ssh/ssh_host_rsa_key") == "denied_path" + assert pol.deny_reason_link("/etc//ssh/ssh_host_ed25519_key") == "denied_path" + + +def test_noncanonical_backup_and_log_fastpaths(): + pol = IgnorePolicy() + assert pol._path_deny_reason("/var/log/foo/../bar.log") == "log_file" + assert pol._path_deny_reason("/etc/foo/../something~") == "backup_file" + assert pol._path_deny_reason("/etc//passwd-") == "backup_file"