340 lines
11 KiB
Python
340 lines
11 KiB
Python
from __future__ import annotations
|
|
|
|
import os
|
|
from pathlib import Path
|
|
|
|
import enroll.pathfilter as pf
|
|
|
|
|
|
def test_compile_and_match_prefix_glob_and_regex(tmp_path: Path):
|
|
from enroll.pathfilter import PathFilter, compile_path_pattern
|
|
|
|
# prefix semantics: matches the exact path and subtree
|
|
p = compile_path_pattern("/etc/nginx")
|
|
assert p.kind == "prefix"
|
|
assert p.matches("/etc/nginx")
|
|
assert p.matches("/etc/nginx/nginx.conf")
|
|
assert not p.matches("/etc/nginx2/nginx.conf")
|
|
|
|
# glob semantics
|
|
g = compile_path_pattern("/etc/**/*.conf")
|
|
assert g.kind == "glob"
|
|
assert g.matches("/etc/nginx/nginx.conf")
|
|
assert not g.matches("/var/etc/nginx.conf")
|
|
|
|
# explicit glob
|
|
g2 = compile_path_pattern("glob:/home/*/.bashrc")
|
|
assert g2.kind == "glob"
|
|
assert g2.matches("/home/alice/.bashrc")
|
|
|
|
# regex semantics (search, not match)
|
|
r = compile_path_pattern(r"re:/home/[^/]+/\.ssh/authorized_keys$")
|
|
assert r.kind == "regex"
|
|
assert r.matches("/home/alice/.ssh/authorized_keys")
|
|
assert not r.matches("/home/alice/.ssh/authorized_keys2")
|
|
|
|
# invalid regex: never matches
|
|
bad = compile_path_pattern("re:[")
|
|
assert bad.kind == "regex"
|
|
assert not bad.matches("/etc/passwd")
|
|
|
|
# exclude wins
|
|
pf = PathFilter(exclude=["/etc/nginx"], include=["/etc/nginx/nginx.conf"])
|
|
assert pf.is_excluded("/etc/nginx/nginx.conf")
|
|
|
|
|
|
def test_expand_includes_respects_exclude_symlinks_and_caps(tmp_path: Path):
|
|
from enroll.pathfilter import PathFilter, compile_path_pattern, expand_includes
|
|
|
|
root = tmp_path / "root"
|
|
(root / "a").mkdir(parents=True)
|
|
(root / "a" / "one.txt").write_text("1", encoding="utf-8")
|
|
(root / "a" / "two.txt").write_text("2", encoding="utf-8")
|
|
(root / "b").mkdir()
|
|
(root / "b" / "secret.txt").write_text("s", encoding="utf-8")
|
|
|
|
# symlink file should be ignored
|
|
os.symlink(str(root / "a" / "one.txt"), str(root / "a" / "link.txt"))
|
|
|
|
exclude = PathFilter(exclude=[str(root / "b")])
|
|
|
|
pats = [
|
|
compile_path_pattern(str(root / "a")),
|
|
compile_path_pattern("glob:" + str(root / "**" / "*.txt")),
|
|
]
|
|
|
|
paths, notes = expand_includes(pats, exclude=exclude, max_files=2)
|
|
# cap should limit to 2 files
|
|
assert len(paths) == 2
|
|
assert any("cap" in n.lower() for n in notes)
|
|
# excluded dir should not contribute
|
|
assert all("/b/" not in p for p in paths)
|
|
# symlink ignored
|
|
assert all(not p.endswith("link.txt") for p in paths)
|
|
|
|
|
|
def test_expand_includes_notes_on_no_matches(tmp_path: Path):
|
|
from enroll.pathfilter import compile_path_pattern, expand_includes
|
|
|
|
pats = [compile_path_pattern(str(tmp_path / "does_not_exist"))]
|
|
paths, notes = expand_includes(pats, max_files=10)
|
|
assert paths == []
|
|
assert any("matched no files" in n.lower() for n in notes)
|
|
|
|
|
|
def test_expand_includes_supports_regex_with_inferred_root(tmp_path: Path):
|
|
"""Regex includes are expanded by walking an inferred literal prefix root."""
|
|
from enroll.pathfilter import compile_path_pattern, expand_includes
|
|
|
|
root = tmp_path / "root"
|
|
(root / "home" / "alice" / ".config" / "myapp").mkdir(parents=True)
|
|
target = root / "home" / "alice" / ".config" / "myapp" / "settings.ini"
|
|
target.write_text("x=1\n", encoding="utf-8")
|
|
|
|
# This is anchored and begins with an absolute path, so expand_includes should
|
|
# infer a narrow walk root instead of scanning '/'.
|
|
rex = rf"re:^{root}/home/[^/]+/\.config/myapp/.*$"
|
|
pat = compile_path_pattern(rex)
|
|
paths, notes = expand_includes([pat], max_files=10)
|
|
assert str(target) in paths
|
|
assert notes == []
|
|
|
|
|
|
def test_compile_path_pattern_normalises_relative_prefix():
|
|
from enroll.pathfilter import compile_path_pattern
|
|
|
|
p = compile_path_pattern("etc/ssh")
|
|
assert p.kind == "prefix"
|
|
assert p.value == "/etc/ssh"
|
|
|
|
|
|
def test_norm_abs_empty_string_is_root():
|
|
assert pf._norm_abs("") == "/"
|
|
|
|
|
|
def test_posix_match_invalid_pattern_fails_closed(monkeypatch):
|
|
# Force PurePosixPath.match to raise to cover the exception handler.
|
|
real_match = pf.PurePosixPath.match
|
|
|
|
def boom(self, pat):
|
|
raise ValueError("bad pattern")
|
|
|
|
monkeypatch.setattr(pf.PurePosixPath, "match", boom)
|
|
try:
|
|
assert pf._posix_match("/etc/hosts", "[bad") is False
|
|
finally:
|
|
monkeypatch.setattr(pf.PurePosixPath, "match", real_match)
|
|
|
|
|
|
def test_regex_literal_prefix_handles_escapes():
|
|
# Prefix stops at meta chars but includes escaped literals.
|
|
assert pf._regex_literal_prefix(r"^/etc/\./foo") == "/etc/./foo"
|
|
|
|
|
|
def test_expand_includes_maybe_add_file_skips_non_files(monkeypatch, tmp_path: Path):
|
|
# Drive the _maybe_add_file branch that rejects symlinks/non-files.
|
|
pats = [pf.compile_path_pattern(str(tmp_path / "missing"))]
|
|
|
|
monkeypatch.setattr(pf.os.path, "isfile", lambda p: False)
|
|
monkeypatch.setattr(pf.os.path, "islink", lambda p: False)
|
|
monkeypatch.setattr(pf.os.path, "isdir", lambda p: False)
|
|
|
|
paths, notes = pf.expand_includes(pats, max_files=10)
|
|
assert paths == []
|
|
assert any("matched no files" in n for n in notes)
|
|
|
|
|
|
def test_expand_includes_prunes_excluded_dirs(monkeypatch):
|
|
include = [pf.compile_path_pattern("/root/**")]
|
|
exclude = pf.PathFilter(exclude=["/root/skip/**"])
|
|
|
|
# Simulate filesystem walk:
|
|
# /root has dirnames ['skip', 'keep'] but skip should be pruned.
|
|
monkeypatch.setattr(
|
|
pf.os.path,
|
|
"isdir",
|
|
lambda p: p in {"/root", "/root/keep", "/root/skip"},
|
|
)
|
|
monkeypatch.setattr(pf.os.path, "islink", lambda p: False)
|
|
monkeypatch.setattr(pf.os.path, "isfile", lambda p: True)
|
|
|
|
def walk(root, followlinks=False):
|
|
assert root == "/root"
|
|
yield ("/root", ["skip", "keep"], [])
|
|
yield ("/root/keep", [], ["a.txt"])
|
|
# If pruning works, we should never walk into /root/skip.
|
|
|
|
monkeypatch.setattr(pf.os, "walk", walk)
|
|
|
|
paths, _notes = pf.expand_includes(include, exclude=exclude, max_files=10)
|
|
assert "/root/keep/a.txt" in paths
|
|
assert not any(p.startswith("/root/skip") for p in paths)
|
|
|
|
|
|
def test_expand_includes_respects_max_files(monkeypatch):
|
|
include = [pf.compile_path_pattern("/root/**")]
|
|
monkeypatch.setattr(pf.os.path, "isdir", lambda p: p == "/root")
|
|
monkeypatch.setattr(pf.os.path, "islink", lambda p: False)
|
|
monkeypatch.setattr(pf.os.path, "isfile", lambda p: True)
|
|
monkeypatch.setattr(
|
|
pf.os,
|
|
"walk",
|
|
lambda root, followlinks=False: [("/root", [], ["a", "b", "c"])],
|
|
)
|
|
paths, notes = pf.expand_includes(include, max_files=2)
|
|
assert len(paths) == 2
|
|
assert "/root/c" not in paths
|
|
|
|
|
|
def test_has_glob_chars():
|
|
assert pf._has_glob_chars("*.txt") is True
|
|
assert pf._has_glob_chars("file?.log") is True
|
|
assert pf._has_glob_chars("[abc]") is True
|
|
assert pf._has_glob_chars("file.txt") is False
|
|
assert pf._has_glob_chars("") is False
|
|
|
|
|
|
def test_compile_path_pattern_regex_valid():
|
|
result = pf.compile_path_pattern("re:^/home/.*$")
|
|
assert result.kind == "regex"
|
|
assert result.regex is not None
|
|
assert result.regex.search("/home/user/file.txt") is not None
|
|
assert result.regex.search("/var/file.txt") is None
|
|
|
|
|
|
def test_compile_path_pattern_glob_forced():
|
|
result = pf.compile_path_pattern("glob:/etc/*.conf")
|
|
assert result.kind == "glob"
|
|
assert result.value == "/etc/*.conf"
|
|
|
|
|
|
def test_compile_path_pattern_glob_heuristic():
|
|
result = pf.compile_path_pattern("/etc/*.conf")
|
|
assert result.kind == "glob"
|
|
|
|
|
|
def test_compile_path_pattern_prefix():
|
|
result = pf.compile_path_pattern("/etc/nginx")
|
|
assert result.kind == "prefix"
|
|
assert result.value == "/etc/nginx"
|
|
|
|
|
|
def test_compiled_pattern_matches_prefix():
|
|
pat = pf.compile_path_pattern("/etc/nginx")
|
|
assert pat.matches("/etc/nginx") is True
|
|
assert pat.matches("/etc/nginx/conf.d") is True
|
|
assert pat.matches("/etc/ssh") is False
|
|
|
|
|
|
def test_compiled_pattern_matches_glob():
|
|
pat = pf.compile_path_pattern("/etc/*.conf")
|
|
assert pat.matches("/etc/ssh.conf") is True
|
|
assert pat.matches("/etc/ssh/sshd.conf") is False
|
|
|
|
|
|
def test_compiled_pattern_matches_regex():
|
|
pat = pf.compile_path_pattern("re:^/home/[^/]+/.bashrc$")
|
|
assert pat.matches("/home/alice/.bashrc") is True
|
|
assert pat.matches("/home/bob/.bashrc") is True
|
|
assert pat.matches("/home/alice/.profile") is False
|
|
assert pat.matches("/var/.bashrc") is False
|
|
|
|
|
|
def test_path_filter_is_excluded():
|
|
pf_filter = pf.PathFilter(exclude=["/tmp/*", "/var/log"])
|
|
assert pf_filter.is_excluded("/tmp/file.txt") is True
|
|
assert pf_filter.is_excluded("/var/log/syslog") is True
|
|
assert pf_filter.is_excluded("/etc/ssh") is False
|
|
|
|
|
|
def test_path_filter_empty():
|
|
pf_filter = pf.PathFilter()
|
|
assert pf_filter.is_excluded("/anything") is False
|
|
assert pf_filter.iter_include_patterns() == []
|
|
|
|
|
|
def test_expand_includes_prefix_existing(tmp_path: Path):
|
|
etc_dir = tmp_path / "etc"
|
|
etc_dir.mkdir()
|
|
(etc_dir / "file1.txt").write_text("a")
|
|
(etc_dir / "file2.txt").write_text("b")
|
|
|
|
patterns = [pf.compile_path_pattern(str(etc_dir))]
|
|
paths, notes = pf.expand_includes(patterns, max_files=10)
|
|
|
|
assert len(paths) == 2
|
|
assert notes == []
|
|
|
|
|
|
def test_expand_includes_prefix_nonexistent():
|
|
patterns = [pf.compile_path_pattern("/nonexistent/path")]
|
|
paths, notes = pf.expand_includes(patterns, max_files=10)
|
|
|
|
assert paths == []
|
|
assert len(notes) == 1
|
|
assert "matched no files" in notes[0]
|
|
|
|
|
|
def test_expand_includes_glob_no_matches():
|
|
patterns = [pf.compile_path_pattern("/nonexistent/*.txt")]
|
|
paths, notes = pf.expand_includes(patterns, max_files=10)
|
|
|
|
assert paths == []
|
|
assert len(notes) == 1
|
|
|
|
|
|
def test_expand_includes_skips_symlinks(tmp_path: Path):
|
|
real_file = tmp_path / "real.txt"
|
|
real_file.write_text("x")
|
|
link = tmp_path / "link.txt"
|
|
os.symlink(str(real_file), str(link))
|
|
|
|
patterns = [pf.compile_path_pattern(str(tmp_path))]
|
|
paths, notes = pf.expand_includes(patterns, max_files=10)
|
|
|
|
assert len(paths) == 1
|
|
assert paths[0].endswith("real.txt")
|
|
|
|
|
|
def test_expand_includes_excludes_pattern(tmp_path: Path):
|
|
etc_dir = tmp_path / "etc"
|
|
etc_dir.mkdir()
|
|
(etc_dir / "include.txt").write_text("a")
|
|
(etc_dir / "exclude.txt").write_text("b")
|
|
|
|
patterns = [pf.compile_path_pattern(str(etc_dir))]
|
|
exclude = pf.PathFilter(exclude=["*exclude*"])
|
|
paths, notes = pf.expand_includes(patterns, exclude=exclude, max_files=10)
|
|
|
|
assert len(paths) == 1
|
|
assert paths[0].endswith("include.txt")
|
|
|
|
|
|
def test_expand_includes_skips_directories(tmp_path: Path):
|
|
subdir = tmp_path / "subdir"
|
|
subdir.mkdir()
|
|
(tmp_path / "file.txt").write_text("x")
|
|
|
|
patterns = [pf.compile_path_pattern(str(subdir))]
|
|
paths, notes = pf.expand_includes(patterns, max_files=10)
|
|
|
|
assert paths == []
|
|
|
|
|
|
def test_regex_literal_prefix_simple():
|
|
assert pf._regex_literal_prefix("/etc/nginx/") == "/etc/nginx/"
|
|
|
|
|
|
def test_regex_literal_prefix_with_anchor():
|
|
assert pf._regex_literal_prefix("^/etc/nginx/") == "/etc/nginx/"
|
|
|
|
|
|
def test_regex_literal_prefix_with_regex_chars():
|
|
assert pf._regex_literal_prefix("^/etc/.*\\.conf$") == "/etc/"
|
|
|
|
|
|
def test_path_filter_with_include_patterns():
|
|
pf_filter = pf.PathFilter(include=["/etc/*.conf"], exclude=["/etc/secret.conf"])
|
|
patterns = pf_filter.iter_include_patterns()
|
|
assert len(patterns) == 1
|
|
assert patterns[0].kind == "glob"
|