Ensure that --include-path records (but does not traverse) symlinks
This commit is contained in:
parent
07b07e60c5
commit
952687e15d
2 changed files with 95 additions and 8 deletions
|
|
@ -5,12 +5,13 @@ import os
|
||||||
from typing import Dict, List, Optional, Set
|
from typing import Dict, List, Optional, Set
|
||||||
|
|
||||||
from .. import harvest as h
|
from .. import harvest as h
|
||||||
from ..capture import capture_file
|
from ..capture import capture_file, capture_link
|
||||||
from ..harvest_types import (
|
from ..harvest_types import (
|
||||||
ExcludedFile,
|
ExcludedFile,
|
||||||
ExtraPathsSnapshot,
|
ExtraPathsSnapshot,
|
||||||
ManagedDir,
|
ManagedDir,
|
||||||
ManagedFile,
|
ManagedFile,
|
||||||
|
ManagedLink,
|
||||||
UsrLocalCustomSnapshot,
|
UsrLocalCustomSnapshot,
|
||||||
)
|
)
|
||||||
from ..system_paths import MAX_FILES_CAP
|
from ..system_paths import MAX_FILES_CAP
|
||||||
|
|
@ -132,6 +133,7 @@ class ExtraPathsCollector(HarvestCollector):
|
||||||
self.notes: List[str] = []
|
self.notes: List[str] = []
|
||||||
self.excluded: List[ExcludedFile] = []
|
self.excluded: List[ExcludedFile] = []
|
||||||
self.managed: List[ManagedFile] = []
|
self.managed: List[ManagedFile] = []
|
||||||
|
self.managed_links: List[ManagedLink] = []
|
||||||
self.managed_dirs: List[ManagedDir] = []
|
self.managed_dirs: List[ManagedDir] = []
|
||||||
self.dir_seen: Set[str] = set()
|
self.dir_seen: Set[str] = set()
|
||||||
|
|
||||||
|
|
@ -178,28 +180,53 @@ class ExtraPathsCollector(HarvestCollector):
|
||||||
exclude_patterns=self.exclude_specs,
|
exclude_patterns=self.exclude_specs,
|
||||||
managed_dirs=self.managed_dirs,
|
managed_dirs=self.managed_dirs,
|
||||||
managed_files=self.managed,
|
managed_files=self.managed,
|
||||||
|
managed_links=self.managed_links,
|
||||||
excluded=self.excluded,
|
excluded=self.excluded,
|
||||||
notes=self.notes,
|
notes=self.notes,
|
||||||
)
|
)
|
||||||
|
|
||||||
def _collect_included_dirs(self) -> None:
|
def _collect_included_dirs(self) -> None:
|
||||||
|
role_seen = self.seen_by_role.setdefault(self.role_name, set())
|
||||||
for pat in self.context.path_filter.iter_include_patterns():
|
for pat in self.context.path_filter.iter_include_patterns():
|
||||||
if pat.kind == "prefix":
|
if pat.kind == "prefix":
|
||||||
path = pat.value
|
path = pat.value
|
||||||
if os.path.isdir(path) and not os.path.islink(path):
|
if os.path.islink(path):
|
||||||
self._walk_and_capture_dirs(path)
|
self._capture_included_link(path, role_seen)
|
||||||
|
elif os.path.isdir(path):
|
||||||
|
self._walk_and_capture_dirs(path, role_seen)
|
||||||
elif pat.kind == "glob":
|
elif pat.kind == "glob":
|
||||||
for hit in glob.glob(pat.value, recursive=True):
|
for hit in glob.glob(pat.value, recursive=True):
|
||||||
if os.path.isdir(hit) and not os.path.islink(hit):
|
if os.path.islink(hit):
|
||||||
self._walk_and_capture_dirs(hit)
|
self._capture_included_link(hit, role_seen)
|
||||||
|
elif os.path.isdir(hit):
|
||||||
|
self._walk_and_capture_dirs(hit, role_seen)
|
||||||
|
|
||||||
def _walk_and_capture_dirs(self, root: str) -> None:
|
def _capture_included_link(self, path: str, role_seen: Set[str]) -> None:
|
||||||
|
path = os.path.normpath(path)
|
||||||
|
if not path.startswith("/"):
|
||||||
|
path = "/" + path
|
||||||
|
if path in self.already_all:
|
||||||
|
return
|
||||||
|
if capture_link(
|
||||||
|
role_name=self.role_name,
|
||||||
|
abs_path=path,
|
||||||
|
reason="user_include_link",
|
||||||
|
policy=self.context.policy,
|
||||||
|
path_filter=self.context.path_filter,
|
||||||
|
managed_out=self.managed_links,
|
||||||
|
excluded_out=self.excluded,
|
||||||
|
seen_role=role_seen,
|
||||||
|
seen_global=self.context.captured_global,
|
||||||
|
):
|
||||||
|
self.already_all.add(path)
|
||||||
|
|
||||||
|
def _walk_and_capture_dirs(self, root: str, role_seen: Set[str]) -> None:
|
||||||
root = os.path.normpath(root)
|
root = os.path.normpath(root)
|
||||||
if not root.startswith("/"):
|
if not root.startswith("/"):
|
||||||
root = "/" + root
|
root = "/" + root
|
||||||
if not os.path.isdir(root) or os.path.islink(root):
|
if not os.path.isdir(root) or os.path.islink(root):
|
||||||
return
|
return
|
||||||
for dirpath, dirnames, _ in os.walk(root, followlinks=False):
|
for dirpath, dirnames, filenames in os.walk(root, followlinks=False):
|
||||||
if len(self.managed_dirs) >= MAX_FILES_CAP:
|
if len(self.managed_dirs) >= MAX_FILES_CAP:
|
||||||
self.notes.append(
|
self.notes.append(
|
||||||
f"Reached directory cap ({MAX_FILES_CAP}) while scanning {root}."
|
f"Reached directory cap ({MAX_FILES_CAP}) while scanning {root}."
|
||||||
|
|
@ -243,7 +270,17 @@ class ExtraPathsCollector(HarvestCollector):
|
||||||
pruned: List[str] = []
|
pruned: List[str] = []
|
||||||
for dirname in dirnames:
|
for dirname in dirnames:
|
||||||
path = os.path.join(dirpath, dirname)
|
path = os.path.join(dirpath, dirname)
|
||||||
if os.path.islink(path) or self.context.path_filter.is_excluded(path):
|
if self.context.path_filter.is_excluded(path):
|
||||||
|
continue
|
||||||
|
if os.path.islink(path):
|
||||||
|
self._capture_included_link(path, role_seen)
|
||||||
continue
|
continue
|
||||||
pruned.append(dirname)
|
pruned.append(dirname)
|
||||||
dirnames[:] = pruned
|
dirnames[:] = pruned
|
||||||
|
|
||||||
|
for filename in filenames:
|
||||||
|
path = os.path.join(dirpath, filename)
|
||||||
|
if self.context.path_filter.is_excluded(path):
|
||||||
|
continue
|
||||||
|
if os.path.islink(path):
|
||||||
|
self._capture_included_link(path, role_seen)
|
||||||
|
|
|
||||||
|
|
@ -394,3 +394,53 @@ def test_usr_local_custom_collector_scans_executable_bin_and_notes_cap(
|
||||||
"usr_local_etc_custom",
|
"usr_local_etc_custom",
|
||||||
"usr_local_bin_script",
|
"usr_local_bin_script",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_extra_paths_collector_records_symlinks_without_following(tmp_path):
|
||||||
|
root = tmp_path / "include"
|
||||||
|
root.mkdir()
|
||||||
|
real_file = root / "real.conf"
|
||||||
|
real_file.write_text("ok", encoding="utf-8")
|
||||||
|
(root / "link.conf").symlink_to("real.conf")
|
||||||
|
|
||||||
|
outside = tmp_path / "outside"
|
||||||
|
outside.mkdir()
|
||||||
|
(outside / "outside.conf").write_text("do-not-follow", encoding="utf-8")
|
||||||
|
(root / "shared").symlink_to(outside, target_is_directory=True)
|
||||||
|
|
||||||
|
ctx = _context(tmp_path, include=[str(root)])
|
||||||
|
result = ExtraPathsCollector(
|
||||||
|
ctx,
|
||||||
|
seen_by_role={},
|
||||||
|
already_all=set(),
|
||||||
|
include_paths=[str(root)],
|
||||||
|
).collect()
|
||||||
|
|
||||||
|
links = {(link.path, link.target, link.reason) for link in result.managed_links}
|
||||||
|
assert (str(root / "link.conf"), "real.conf", "user_include_link") in links
|
||||||
|
assert (str(root / "shared"), str(outside), "user_include_link") in links
|
||||||
|
|
||||||
|
managed_files = {mf.path for mf in result.managed_files}
|
||||||
|
assert str(real_file) in managed_files
|
||||||
|
assert str(outside / "outside.conf") not in managed_files
|
||||||
|
|
||||||
|
|
||||||
|
def test_extra_paths_collector_records_include_path_that_is_symlink(tmp_path):
|
||||||
|
real_root = tmp_path / "real"
|
||||||
|
real_root.mkdir()
|
||||||
|
(real_root / "inside.conf").write_text("do-not-follow", encoding="utf-8")
|
||||||
|
link_root = tmp_path / "linked-root"
|
||||||
|
link_root.symlink_to(real_root, target_is_directory=True)
|
||||||
|
|
||||||
|
ctx = _context(tmp_path, include=[str(link_root)])
|
||||||
|
result = ExtraPathsCollector(
|
||||||
|
ctx,
|
||||||
|
seen_by_role={},
|
||||||
|
already_all=set(),
|
||||||
|
include_paths=[str(link_root)],
|
||||||
|
).collect()
|
||||||
|
|
||||||
|
assert [(link.path, link.target, link.reason) for link in result.managed_links] == [
|
||||||
|
(str(link_root), str(real_root), "user_include_link")
|
||||||
|
]
|
||||||
|
assert result.managed_files == []
|
||||||
|
|
|
||||||
Reference in a new issue