enroll/tests/test_pathfilter.py
2026-05-31 16:50:57 +10:00

340 lines
11 KiB
Python

from __future__ import annotations
import os
from pathlib import Path
import enroll.pathfilter as pf
def test_compile_and_match_prefix_glob_and_regex(tmp_path: Path):
from enroll.pathfilter import PathFilter, compile_path_pattern
# prefix semantics: matches the exact path and subtree
p = compile_path_pattern("/etc/nginx")
assert p.kind == "prefix"
assert p.matches("/etc/nginx")
assert p.matches("/etc/nginx/nginx.conf")
assert not p.matches("/etc/nginx2/nginx.conf")
# glob semantics
g = compile_path_pattern("/etc/**/*.conf")
assert g.kind == "glob"
assert g.matches("/etc/nginx/nginx.conf")
assert not g.matches("/var/etc/nginx.conf")
# explicit glob
g2 = compile_path_pattern("glob:/home/*/.bashrc")
assert g2.kind == "glob"
assert g2.matches("/home/alice/.bashrc")
# regex semantics (search, not match)
r = compile_path_pattern(r"re:/home/[^/]+/\.ssh/authorized_keys$")
assert r.kind == "regex"
assert r.matches("/home/alice/.ssh/authorized_keys")
assert not r.matches("/home/alice/.ssh/authorized_keys2")
# invalid regex: never matches
bad = compile_path_pattern("re:[")
assert bad.kind == "regex"
assert not bad.matches("/etc/passwd")
# exclude wins
pf = PathFilter(exclude=["/etc/nginx"], include=["/etc/nginx/nginx.conf"])
assert pf.is_excluded("/etc/nginx/nginx.conf")
def test_expand_includes_respects_exclude_symlinks_and_caps(tmp_path: Path):
from enroll.pathfilter import PathFilter, compile_path_pattern, expand_includes
root = tmp_path / "root"
(root / "a").mkdir(parents=True)
(root / "a" / "one.txt").write_text("1", encoding="utf-8")
(root / "a" / "two.txt").write_text("2", encoding="utf-8")
(root / "b").mkdir()
(root / "b" / "secret.txt").write_text("s", encoding="utf-8")
# symlink file should be ignored
os.symlink(str(root / "a" / "one.txt"), str(root / "a" / "link.txt"))
exclude = PathFilter(exclude=[str(root / "b")])
pats = [
compile_path_pattern(str(root / "a")),
compile_path_pattern("glob:" + str(root / "**" / "*.txt")),
]
paths, notes = expand_includes(pats, exclude=exclude, max_files=2)
# cap should limit to 2 files
assert len(paths) == 2
assert any("cap" in n.lower() for n in notes)
# excluded dir should not contribute
assert all("/b/" not in p for p in paths)
# symlink ignored
assert all(not p.endswith("link.txt") for p in paths)
def test_expand_includes_notes_on_no_matches(tmp_path: Path):
from enroll.pathfilter import compile_path_pattern, expand_includes
pats = [compile_path_pattern(str(tmp_path / "does_not_exist"))]
paths, notes = expand_includes(pats, max_files=10)
assert paths == []
assert any("matched no files" in n.lower() for n in notes)
def test_expand_includes_supports_regex_with_inferred_root(tmp_path: Path):
"""Regex includes are expanded by walking an inferred literal prefix root."""
from enroll.pathfilter import compile_path_pattern, expand_includes
root = tmp_path / "root"
(root / "home" / "alice" / ".config" / "myapp").mkdir(parents=True)
target = root / "home" / "alice" / ".config" / "myapp" / "settings.ini"
target.write_text("x=1\n", encoding="utf-8")
# This is anchored and begins with an absolute path, so expand_includes should
# infer a narrow walk root instead of scanning '/'.
rex = rf"re:^{root}/home/[^/]+/\.config/myapp/.*$"
pat = compile_path_pattern(rex)
paths, notes = expand_includes([pat], max_files=10)
assert str(target) in paths
assert notes == []
def test_compile_path_pattern_normalises_relative_prefix():
from enroll.pathfilter import compile_path_pattern
p = compile_path_pattern("etc/ssh")
assert p.kind == "prefix"
assert p.value == "/etc/ssh"
def test_norm_abs_empty_string_is_root():
assert pf._norm_abs("") == "/"
def test_posix_match_invalid_pattern_fails_closed(monkeypatch):
# Force PurePosixPath.match to raise to cover the exception handler.
real_match = pf.PurePosixPath.match
def boom(self, pat):
raise ValueError("bad pattern")
monkeypatch.setattr(pf.PurePosixPath, "match", boom)
try:
assert pf._posix_match("/etc/hosts", "[bad") is False
finally:
monkeypatch.setattr(pf.PurePosixPath, "match", real_match)
def test_regex_literal_prefix_handles_escapes():
# Prefix stops at meta chars but includes escaped literals.
assert pf._regex_literal_prefix(r"^/etc/\./foo") == "/etc/./foo"
def test_expand_includes_maybe_add_file_skips_non_files(monkeypatch, tmp_path: Path):
# Drive the _maybe_add_file branch that rejects symlinks/non-files.
pats = [pf.compile_path_pattern(str(tmp_path / "missing"))]
monkeypatch.setattr(pf.os.path, "isfile", lambda p: False)
monkeypatch.setattr(pf.os.path, "islink", lambda p: False)
monkeypatch.setattr(pf.os.path, "isdir", lambda p: False)
paths, notes = pf.expand_includes(pats, max_files=10)
assert paths == []
assert any("matched no files" in n for n in notes)
def test_expand_includes_prunes_excluded_dirs(monkeypatch):
include = [pf.compile_path_pattern("/root/**")]
exclude = pf.PathFilter(exclude=["/root/skip/**"])
# Simulate filesystem walk:
# /root has dirnames ['skip', 'keep'] but skip should be pruned.
monkeypatch.setattr(
pf.os.path,
"isdir",
lambda p: p in {"/root", "/root/keep", "/root/skip"},
)
monkeypatch.setattr(pf.os.path, "islink", lambda p: False)
monkeypatch.setattr(pf.os.path, "isfile", lambda p: True)
def walk(root, followlinks=False):
assert root == "/root"
yield ("/root", ["skip", "keep"], [])
yield ("/root/keep", [], ["a.txt"])
# If pruning works, we should never walk into /root/skip.
monkeypatch.setattr(pf.os, "walk", walk)
paths, _notes = pf.expand_includes(include, exclude=exclude, max_files=10)
assert "/root/keep/a.txt" in paths
assert not any(p.startswith("/root/skip") for p in paths)
def test_expand_includes_respects_max_files(monkeypatch):
include = [pf.compile_path_pattern("/root/**")]
monkeypatch.setattr(pf.os.path, "isdir", lambda p: p == "/root")
monkeypatch.setattr(pf.os.path, "islink", lambda p: False)
monkeypatch.setattr(pf.os.path, "isfile", lambda p: True)
monkeypatch.setattr(
pf.os,
"walk",
lambda root, followlinks=False: [("/root", [], ["a", "b", "c"])],
)
paths, notes = pf.expand_includes(include, max_files=2)
assert len(paths) == 2
assert "/root/c" not in paths
def test_has_glob_chars():
assert pf._has_glob_chars("*.txt") is True
assert pf._has_glob_chars("file?.log") is True
assert pf._has_glob_chars("[abc]") is True
assert pf._has_glob_chars("file.txt") is False
assert pf._has_glob_chars("") is False
def test_compile_path_pattern_regex_valid():
result = pf.compile_path_pattern("re:^/home/.*$")
assert result.kind == "regex"
assert result.regex is not None
assert result.regex.search("/home/user/file.txt") is not None
assert result.regex.search("/var/file.txt") is None
def test_compile_path_pattern_glob_forced():
result = pf.compile_path_pattern("glob:/etc/*.conf")
assert result.kind == "glob"
assert result.value == "/etc/*.conf"
def test_compile_path_pattern_glob_heuristic():
result = pf.compile_path_pattern("/etc/*.conf")
assert result.kind == "glob"
def test_compile_path_pattern_prefix():
result = pf.compile_path_pattern("/etc/nginx")
assert result.kind == "prefix"
assert result.value == "/etc/nginx"
def test_compiled_pattern_matches_prefix():
pat = pf.compile_path_pattern("/etc/nginx")
assert pat.matches("/etc/nginx") is True
assert pat.matches("/etc/nginx/conf.d") is True
assert pat.matches("/etc/ssh") is False
def test_compiled_pattern_matches_glob():
pat = pf.compile_path_pattern("/etc/*.conf")
assert pat.matches("/etc/ssh.conf") is True
assert pat.matches("/etc/ssh/sshd.conf") is False
def test_compiled_pattern_matches_regex():
pat = pf.compile_path_pattern("re:^/home/[^/]+/.bashrc$")
assert pat.matches("/home/alice/.bashrc") is True
assert pat.matches("/home/bob/.bashrc") is True
assert pat.matches("/home/alice/.profile") is False
assert pat.matches("/var/.bashrc") is False
def test_path_filter_is_excluded():
pf_filter = pf.PathFilter(exclude=["/tmp/*", "/var/log"])
assert pf_filter.is_excluded("/tmp/file.txt") is True
assert pf_filter.is_excluded("/var/log/syslog") is True
assert pf_filter.is_excluded("/etc/ssh") is False
def test_path_filter_empty():
pf_filter = pf.PathFilter()
assert pf_filter.is_excluded("/anything") is False
assert pf_filter.iter_include_patterns() == []
def test_expand_includes_prefix_existing(tmp_path: Path):
etc_dir = tmp_path / "etc"
etc_dir.mkdir()
(etc_dir / "file1.txt").write_text("a")
(etc_dir / "file2.txt").write_text("b")
patterns = [pf.compile_path_pattern(str(etc_dir))]
paths, notes = pf.expand_includes(patterns, max_files=10)
assert len(paths) == 2
assert notes == []
def test_expand_includes_prefix_nonexistent():
patterns = [pf.compile_path_pattern("/nonexistent/path")]
paths, notes = pf.expand_includes(patterns, max_files=10)
assert paths == []
assert len(notes) == 1
assert "matched no files" in notes[0]
def test_expand_includes_glob_no_matches():
patterns = [pf.compile_path_pattern("/nonexistent/*.txt")]
paths, notes = pf.expand_includes(patterns, max_files=10)
assert paths == []
assert len(notes) == 1
def test_expand_includes_skips_symlinks(tmp_path: Path):
real_file = tmp_path / "real.txt"
real_file.write_text("x")
link = tmp_path / "link.txt"
os.symlink(str(real_file), str(link))
patterns = [pf.compile_path_pattern(str(tmp_path))]
paths, notes = pf.expand_includes(patterns, max_files=10)
assert len(paths) == 1
assert paths[0].endswith("real.txt")
def test_expand_includes_excludes_pattern(tmp_path: Path):
etc_dir = tmp_path / "etc"
etc_dir.mkdir()
(etc_dir / "include.txt").write_text("a")
(etc_dir / "exclude.txt").write_text("b")
patterns = [pf.compile_path_pattern(str(etc_dir))]
exclude = pf.PathFilter(exclude=["*exclude*"])
paths, notes = pf.expand_includes(patterns, exclude=exclude, max_files=10)
assert len(paths) == 1
assert paths[0].endswith("include.txt")
def test_expand_includes_skips_directories(tmp_path: Path):
subdir = tmp_path / "subdir"
subdir.mkdir()
(tmp_path / "file.txt").write_text("x")
patterns = [pf.compile_path_pattern(str(subdir))]
paths, notes = pf.expand_includes(patterns, max_files=10)
assert paths == []
def test_regex_literal_prefix_simple():
assert pf._regex_literal_prefix("/etc/nginx/") == "/etc/nginx/"
def test_regex_literal_prefix_with_anchor():
assert pf._regex_literal_prefix("^/etc/nginx/") == "/etc/nginx/"
def test_regex_literal_prefix_with_regex_chars():
assert pf._regex_literal_prefix("^/etc/.*\\.conf$") == "/etc/"
def test_path_filter_with_include_patterns():
pf_filter = pf.PathFilter(include=["/etc/*.conf"], exclude=["/etc/secret.conf"])
patterns = pf_filter.iter_include_patterns()
assert len(patterns) == 1
assert patterns[0].kind == "glob"