Allow the user to add extra paths to harvest, or
All checks were successful
CI / test (push) Successful in 5m31s
Lint / test (push) Successful in 34s
Trivy / test (push) Successful in 19s

paths to ignore, using `--exclude-path` and
`--include-path` arguments.
This commit is contained in:
Miguel Jacq 2025-12-20 17:47:00 +11:00
parent 25add369dc
commit 240e79706f
Signed by: mig5
GPG key ID: 59B3F0C24135C6A9
9 changed files with 687 additions and 12 deletions

View file

@ -1,3 +1,8 @@
# 0.1.3
* Allow the user to add extra paths to harvest, or paths to ignore, using `--exclude-path` and `--include-path`
arguments.
# 0.1.2
* Include files from `/usr/local/bin` and `/usr/local/etc` in harvest (assuming they aren't binaries or

View file

@ -69,6 +69,7 @@ Harvest state about a host and write a harvest bundle.
- Changed-from-default config (plus related custom/unowned files under service dirs)
- Non-system users + SSH public keys
- Misc `/etc` that cant be attributed to a package (`etc_custom` role)
- Optional user-specified extra files/dirs via `--include-path` (emitted as an `extra_paths` role at manifest time)
**Common flags**
- Remote harvesting:
@ -79,6 +80,14 @@ Harvest state about a host and write a harvest bundle.
- `--dangerous`: disables secret-safety checks (see “Sensitive data” below)
- Encrypt bundles at rest:
- `--sops <FINGERPRINT...>`: writes a single encrypted `harvest.tar.gz.sops` instead of a plaintext directory
- Path selection (include/exclude):
- `--include-path <PATTERN>` (repeatable): add extra files/dirs to harvest (even from locations normally ignored, like `/home`). Still subject to secret-safety checks unless `--dangerous`.
- `--exclude-path <PATTERN>` (repeatable): skip files/dirs even if they would normally be harvested.
- Pattern syntax:
- plain path: matches that file; directories match the directory + everything under it
- glob (default): supports `*` and `**` (prefix with `glob:` to force)
- regex: prefix with `re:` or `regex:`
- Precedence: excludes win over includes.
---
@ -227,6 +236,23 @@ enroll harvest --out /tmp/enroll-harvest
enroll harvest --remote-host myhost.example.com --remote-user myuser --out /tmp/enroll-harvest
```
### Include paths (`--include-path`)
```bash
# Add a few dotfiles from /home (still secret-safe unless --dangerous)
enroll harvest --out /tmp/enroll-harvest --include-path '/home/*/.bashrc' --include-path '/home/*/.profile'
```
### Exclude paths (`--exclude-path`)
```bash
# Skip specific /usr/local/bin entries (or patterns)
enroll harvest --out /tmp/enroll-harvest --exclude-path '/usr/local/bin/docker-*' --exclude-path '/usr/local/bin/some-tool'
```
### Regex include
```bash
enroll harvest --out /tmp/enroll-harvest --include-path 're:^/home/[^/]+/\.config/myapp/.*$'
```
### `--dangerous`
```bash
enroll harvest --out /tmp/enroll-harvest --dangerous

View file

@ -125,6 +125,27 @@ def main() -> None:
action="store_true",
help="Collect files more aggressively (may include secrets). Disables secret-avoidance checks.",
)
h.add_argument(
"--include-path",
action="append",
default=[],
metavar="PATTERN",
help=(
"Include extra file paths to harvest (repeatable). Supports globs (including '**') and regex via 're:<regex>'. "
"Included files are still filtered by IgnorePolicy unless --dangerous is used."
),
)
h.add_argument(
"--exclude-path",
action="append",
default=[],
metavar="PATTERN",
help=(
"Exclude file paths from harvesting (repeatable). Supports globs (including '**') and regex via 're:<regex>'. "
"Excludes apply to all harvesting, including defaults."
),
)
h.add_argument(
"--sops",
nargs="+",
@ -186,6 +207,27 @@ def main() -> None:
action="store_true",
help="Collect files more aggressively (may include secrets). Disables secret-avoidance checks.",
)
s.add_argument(
"--include-path",
action="append",
default=[],
metavar="PATTERN",
help=(
"Include extra file paths to harvest (repeatable). Supports globs (including '**') and regex via 're:<regex>'. "
"Included files are still filtered by IgnorePolicy unless --dangerous is used."
),
)
s.add_argument(
"--exclude-path",
action="append",
default=[],
metavar="PATTERN",
help=(
"Exclude file paths from harvesting (repeatable). Supports globs (including '**') and regex via 're:<regex>'. "
"Excludes apply to all harvesting, including defaults."
),
)
s.add_argument(
"--sops",
nargs="+",
@ -320,6 +362,8 @@ def main() -> None:
remote_user=args.remote_user,
dangerous=bool(args.dangerous),
no_sudo=bool(args.no_sudo),
include_paths=list(getattr(args, "include_path", []) or []),
exclude_paths=list(getattr(args, "exclude_path", []) or []),
)
_encrypt_harvest_dir_to_sops(
tmp_bundle, out_file, list(sops_fps)
@ -338,6 +382,8 @@ def main() -> None:
remote_user=args.remote_user,
dangerous=bool(args.dangerous),
no_sudo=bool(args.no_sudo),
include_paths=list(getattr(args, "include_path", []) or []),
exclude_paths=list(getattr(args, "exclude_path", []) or []),
)
print(str(state))
else:
@ -350,7 +396,12 @@ def main() -> None:
os.chmod(tmp_bundle, 0o700)
except OSError:
pass
harvest(str(tmp_bundle), dangerous=bool(args.dangerous))
harvest(
str(tmp_bundle),
dangerous=bool(args.dangerous),
include_paths=list(getattr(args, "include_path", []) or []),
exclude_paths=list(getattr(args, "exclude_path", []) or []),
)
_encrypt_harvest_dir_to_sops(
tmp_bundle, out_file, list(sops_fps)
)
@ -360,7 +411,12 @@ def main() -> None:
raise SystemExit(
"error: --out is required unless --remote-host is set"
)
path = harvest(args.out, dangerous=bool(args.dangerous))
path = harvest(
args.out,
dangerous=bool(args.dangerous),
include_paths=list(getattr(args, "include_path", []) or []),
exclude_paths=list(getattr(args, "exclude_path", []) or []),
)
print(path)
elif args.cmd == "manifest":
out_enc = manifest(
@ -446,6 +502,8 @@ def main() -> None:
remote_user=args.remote_user,
dangerous=bool(args.dangerous),
no_sudo=bool(args.no_sudo),
include_paths=list(getattr(args, "include_path", []) or []),
exclude_paths=list(getattr(args, "exclude_path", []) or []),
)
_encrypt_harvest_dir_to_sops(
tmp_bundle, out_file, list(sops_fps)
@ -473,6 +531,8 @@ def main() -> None:
remote_user=args.remote_user,
dangerous=bool(args.dangerous),
no_sudo=bool(args.no_sudo),
include_paths=list(getattr(args, "include_path", []) or []),
exclude_paths=list(getattr(args, "exclude_path", []) or []),
)
manifest(
str(harvest_dir),
@ -493,7 +553,12 @@ def main() -> None:
os.chmod(tmp_bundle, 0o700)
except OSError:
pass
harvest(str(tmp_bundle), dangerous=bool(args.dangerous))
harvest(
str(tmp_bundle),
dangerous=bool(args.dangerous),
include_paths=list(getattr(args, "include_path", []) or []),
exclude_paths=list(getattr(args, "exclude_path", []) or []),
)
_encrypt_harvest_dir_to_sops(
tmp_bundle, out_file, list(sops_fps)
)
@ -512,7 +577,12 @@ def main() -> None:
raise SystemExit(
"error: --harvest is required unless --remote-host is set"
)
harvest(args.harvest, dangerous=bool(args.dangerous))
harvest(
args.harvest,
dangerous=bool(args.dangerous),
include_paths=list(getattr(args, "include_path", []) or []),
exclude_paths=list(getattr(args, "exclude_path", []) or []),
)
manifest(
args.harvest,
args.out,

View file

@ -196,6 +196,12 @@ def _iter_managed_files(state: Dict[str, Any]) -> Iterable[Tuple[str, Dict[str,
for mf in ul.get("managed_files", []) or []:
yield str(ul_role), mf
# extra_paths
xp = state.get("extra_paths") or {}
xp_role = xp.get("role_name") or "extra_paths"
for mf in xp.get("managed_files", []) or []:
yield str(xp_role), mf
def _file_index(bundle_dir: Path, state: Dict[str, Any]) -> Dict[str, FileRec]:
"""Return mapping of absolute path -> FileRec.

View file

@ -19,6 +19,7 @@ from .debian import (
stat_triplet,
)
from .ignore import IgnorePolicy
from .pathfilter import PathFilter, expand_includes
from .accounts import collect_non_system_users
@ -86,6 +87,16 @@ class UsrLocalCustomSnapshot:
notes: List[str]
@dataclass
class ExtraPathsSnapshot:
role_name: str
include_patterns: List[str]
exclude_patterns: List[str]
managed_files: List[ManagedFile]
excluded: List[ExcludedFile]
notes: List[str]
ALLOWED_UNOWNED_EXTS = {
".conf",
".cfg",
@ -250,6 +261,8 @@ def harvest(
policy: Optional[IgnorePolicy] = None,
*,
dangerous: bool = False,
include_paths: Optional[List[str]] = None,
exclude_paths: Optional[List[str]] = None,
) -> str:
# If a policy is not supplied, build one. `--dangerous` relaxes secret
# detection and deny-glob skipping.
@ -261,6 +274,10 @@ def harvest(
policy.dangerous = True
os.makedirs(bundle_dir, exist_ok=True)
# User-provided includes/excludes. Excludes apply to all harvesting;
# includes are harvested into an extra role.
path_filter = PathFilter(include=include_paths or (), exclude=exclude_paths or ())
if hasattr(os, "geteuid") and os.geteuid() != 0:
print(
"Warning: not running as root; harvest may miss files or metadata.",
@ -406,6 +423,9 @@ def harvest(
)
for path, reason in sorted(candidates.items()):
if path_filter.is_excluded(path):
excluded.append(ExcludedFile(path=path, reason="user_excluded"))
continue
deny = policy.deny_reason(path)
if deny:
excluded.append(ExcludedFile(path=path, reason=deny))
@ -522,6 +542,9 @@ def harvest(
candidates.setdefault(r, "custom_specific_path")
for path, reason in sorted(candidates.items()):
if path_filter.is_excluded(path):
excluded.append(ExcludedFile(path=path, reason="user_excluded"))
continue
deny = policy.deny_reason(path)
if deny:
excluded.append(ExcludedFile(path=path, reason=deny))
@ -593,6 +616,9 @@ def harvest(
# Copy only safe SSH public material: authorized_keys + *.pub
for sf in u.ssh_files:
if path_filter.is_excluded(sf):
users_excluded.append(ExcludedFile(path=sf, reason="user_excluded"))
continue
deny = policy.deny_reason(sf)
if deny:
users_excluded.append(ExcludedFile(path=sf, reason=deny))
@ -665,6 +691,10 @@ def harvest(
if not _is_confish(path):
continue
if path_filter.is_excluded(path):
etc_excluded.append(ExcludedFile(path=path, reason="user_excluded"))
continue
deny = policy.deny_reason(path)
if deny:
etc_excluded.append(ExcludedFile(path=path, reason=deny))
@ -754,6 +784,10 @@ def harvest(
ul_excluded.append(ExcludedFile(path=path, reason="unreadable"))
continue
if path_filter.is_excluded(path):
ul_excluded.append(ExcludedFile(path=path, reason="user_excluded"))
continue
deny = policy.deny_reason(path)
if deny:
ul_excluded.append(ExcludedFile(path=path, reason=deny))
@ -806,6 +840,81 @@ def harvest(
notes=ul_notes,
)
# -------------------------
# extra_paths role (user-requested includes)
# -------------------------
extra_notes: List[str] = []
extra_excluded: List[ExcludedFile] = []
extra_managed: List[ManagedFile] = []
extra_role_name = "extra_paths"
include_specs = list(include_paths or [])
exclude_specs = list(exclude_paths or [])
if include_specs:
extra_notes.append("User include patterns:")
extra_notes.extend([f"- {p}" for p in include_specs])
if exclude_specs:
extra_notes.append("User exclude patterns:")
extra_notes.extend([f"- {p}" for p in exclude_specs])
included_files: List[str] = []
if include_specs:
files, inc_notes = expand_includes(
path_filter.iter_include_patterns(),
exclude=path_filter,
max_files=4000,
)
included_files = files
extra_notes.extend(inc_notes)
for path in included_files:
if path in already_all:
continue
if path_filter.is_excluded(path):
extra_excluded.append(ExcludedFile(path=path, reason="user_excluded"))
continue
deny = policy.deny_reason(path)
if deny:
extra_excluded.append(ExcludedFile(path=path, reason=deny))
continue
try:
owner, group, mode = stat_triplet(path)
except OSError:
extra_excluded.append(ExcludedFile(path=path, reason="unreadable"))
continue
src_rel = path.lstrip("/")
try:
_copy_into_bundle(bundle_dir, extra_role_name, path, src_rel)
except OSError:
extra_excluded.append(ExcludedFile(path=path, reason="unreadable"))
continue
extra_managed.append(
ManagedFile(
path=path,
src_rel=src_rel,
owner=owner,
group=group,
mode=mode,
reason="user_include",
)
)
already_all.add(path)
extra_paths_snapshot = ExtraPathsSnapshot(
role_name=extra_role_name,
include_patterns=include_specs,
exclude_patterns=exclude_specs,
managed_files=extra_managed,
excluded=extra_excluded,
notes=extra_notes,
)
state = {
"host": {"hostname": os.uname().nodename, "os": "debian"},
"users": asdict(users_snapshot),
@ -815,6 +924,7 @@ def harvest(
"package_roles": [asdict(p) for p in pkg_snaps],
"etc_custom": asdict(etc_custom_snapshot),
"usr_local_custom": asdict(usr_local_custom_snapshot),
"extra_paths": asdict(extra_paths_snapshot),
}
state_path = os.path.join(bundle_dir, "state.json")

View file

@ -630,6 +630,7 @@ def _manifest_from_bundle_dir(
users_snapshot: Dict[str, Any] = state.get("users", {})
etc_custom_snapshot: Dict[str, Any] = state.get("etc_custom", {})
usr_local_custom_snapshot: Dict[str, Any] = state.get("usr_local_custom", {})
extra_paths_snapshot: Dict[str, Any] = state.get("extra_paths", {})
site_mode = fqdn is not None and fqdn != ""
@ -663,6 +664,7 @@ def _manifest_from_bundle_dir(
manifested_users_roles: List[str] = []
manifested_etc_custom_roles: List[str] = []
manifested_usr_local_custom_roles: List[str] = []
manifested_extra_paths_roles: List[str] = []
manifested_service_roles: List[str] = []
manifested_pkg_roles: List[str] = []
@ -1098,6 +1100,118 @@ Unowned /etc config files not attributed to packages or services.
manifested_usr_local_custom_roles.append(role)
# -------------------------
# extra_paths role (user-requested includes)
# -------------------------
if extra_paths_snapshot and extra_paths_snapshot.get("managed_files"):
role = extra_paths_snapshot.get("role_name", "extra_paths")
role_dir = os.path.join(roles_root, role)
_write_role_scaffold(role_dir)
var_prefix = role
managed_files = extra_paths_snapshot.get("managed_files", [])
excluded = extra_paths_snapshot.get("excluded", [])
notes = extra_paths_snapshot.get("notes", [])
include_pats = extra_paths_snapshot.get("include_patterns", []) or []
exclude_pats = extra_paths_snapshot.get("exclude_patterns", []) or []
templated, jt_vars = _jinjify_managed_files(
bundle_dir,
role,
role_dir,
managed_files,
jt_exe=jt_exe,
jt_enabled=jt_enabled,
overwrite_templates=not site_mode,
)
if site_mode:
_copy_artifacts(
bundle_dir,
role,
_host_role_files_dir(out_dir, fqdn or "", role),
exclude_rels=templated,
)
else:
_copy_artifacts(
bundle_dir,
role,
os.path.join(role_dir, "files"),
exclude_rels=templated,
)
files_var = _build_managed_files_var(
managed_files,
templated,
notify_other=None,
notify_systemd=None,
)
jt_map = _yaml_load_mapping(jt_vars) if jt_vars.strip() else {}
vars_map: Dict[str, Any] = {f"{var_prefix}_managed_files": files_var}
vars_map = _merge_mappings_overwrite(vars_map, jt_map)
if site_mode:
_write_role_defaults(role_dir, {f"{var_prefix}_managed_files": []})
_write_hostvars(out_dir, fqdn or "", role, vars_map)
else:
_write_role_defaults(role_dir, vars_map)
tasks = "---\n" + _render_generic_files_tasks(
var_prefix, include_restart_notify=False
)
with open(
os.path.join(role_dir, "tasks", "main.yml"), "w", encoding="utf-8"
) as f:
f.write(tasks.rstrip() + "\n")
with open(
os.path.join(role_dir, "handlers", "main.yml"), "w", encoding="utf-8"
) as f:
f.write("---\n")
with open(
os.path.join(role_dir, "meta", "main.yml"), "w", encoding="utf-8"
) as f:
f.write("---\ndependencies: []\n")
readme = (
f"""# {role}
User-requested extra file harvesting.
## Include patterns
"""
+ ("\n".join([f"- {p}" for p in include_pats]) or "- (none)")
+ """\n
## Exclude patterns
"""
+ ("\n".join([f"- {p}" for p in exclude_pats]) or "- (none)")
+ """\n
## Managed files
"""
+ ("\n".join([f"- {mf.get('path')}" for mf in managed_files]) or "- (none)")
+ """\n
## Excluded
"""
+ (
"\n".join([f"- {e.get('path')} ({e.get('reason')})" for e in excluded])
or "- (none)"
)
+ """\n
## Notes
"""
+ ("\n".join([f"- {n}" for n in notes]) or "- (none)")
+ """\n"""
)
with open(os.path.join(role_dir, "README.md"), "w", encoding="utf-8") as f:
f.write(readme)
manifested_extra_paths_roles.append(role)
manifested_usr_local_custom_roles.append(role)
# -------------------------
# -------------------------
@ -1412,6 +1526,7 @@ Generated for package `{pkg}`.
+ manifested_service_roles
+ manifested_etc_custom_roles
+ manifested_usr_local_custom_roles
+ manifested_extra_paths_roles
+ manifested_users_roles
)

293
enroll/pathfilter.py Normal file
View file

@ -0,0 +1,293 @@
from __future__ import annotations
import glob
import os
import re
from dataclasses import dataclass
from pathlib import PurePosixPath
from typing import List, Optional, Sequence, Set, Tuple
_REGEX_PREFIXES = ("re:", "regex:")
def _has_glob_chars(s: str) -> bool:
return any(ch in s for ch in "*?[")
def _norm_abs(p: str) -> str:
"""Normalise a path-ish string to an absolute POSIX path.
We treat inputs that don't start with '/' as being relative to '/'.
"""
p = p.strip()
if not p:
return "/"
if not p.startswith("/"):
p = "/" + p
# `normpath` keeps a leading '/' for absolute paths.
return os.path.normpath(p)
def _posix_match(path: str, pattern: str) -> bool:
"""Path matching with glob semantics.
Uses PurePosixPath.match which:
- treats '/' as a segment separator
- supports '**' for recursive matching
Both `path` and `pattern` are treated as absolute paths.
"""
# PurePosixPath.match is anchored and works best on relative strings.
p = path.lstrip("/")
pat = pattern.lstrip("/")
try:
return PurePosixPath(p).match(pat)
except Exception:
# If the pattern is somehow invalid, fail closed.
return False
def _regex_literal_prefix(regex: str) -> str:
"""Best-effort literal prefix extraction for a regex.
This lets us pick a starting directory to walk when expanding regex-based
include patterns.
"""
s = regex
if s.startswith("^"):
s = s[1:]
out: List[str] = []
escaped = False
meta = set(".^$*+?{}[]\\|()")
for ch in s:
if escaped:
out.append(ch)
escaped = False
continue
if ch == "\\":
escaped = True
continue
if ch in meta:
break
out.append(ch)
return "".join(out)
@dataclass(frozen=True)
class CompiledPathPattern:
raw: str
kind: str # 'prefix' | 'glob' | 'regex'
value: str
regex: Optional[re.Pattern[str]] = None
def matches(self, path: str) -> bool:
p = _norm_abs(path)
if self.kind == "regex":
if not self.regex:
return False
# Search (not match) so users can write unanchored patterns.
return self.regex.search(p) is not None
if self.kind == "glob":
return _posix_match(p, self.value)
# prefix
pref = self.value.rstrip("/")
return p == pref or p.startswith(pref + "/")
def compile_path_pattern(raw: str) -> CompiledPathPattern:
s = raw.strip()
for pre in _REGEX_PREFIXES:
if s.startswith(pre):
rex = s[len(pre) :].strip()
try:
return CompiledPathPattern(
raw=raw, kind="regex", value=rex, regex=re.compile(rex)
)
except re.error:
# Treat invalid regexes as non-matching.
return CompiledPathPattern(raw=raw, kind="regex", value=rex, regex=None)
# If the user explicitly says glob:, honour it.
if s.startswith("glob:"):
pat = s[len("glob:") :].strip()
return CompiledPathPattern(raw=raw, kind="glob", value=_norm_abs(pat))
# Heuristic: if it contains glob metacharacters, treat as a glob.
if _has_glob_chars(s) or "**" in s:
return CompiledPathPattern(raw=raw, kind="glob", value=_norm_abs(s))
# Otherwise treat as an exact path-or-prefix (dir subtree).
return CompiledPathPattern(raw=raw, kind="prefix", value=_norm_abs(s))
@dataclass
class PathFilter:
"""User-provided path filters.
Semantics:
- exclude patterns always win
- include patterns are used only to expand *additional* files to harvest
(they do not restrict the default harvest set)
Patterns:
- By default: glob-like (supports '**')
- Regex: prefix with 're:' or 'regex:'
- Force glob: prefix with 'glob:'
- A plain path without wildcards matches that path and everything under it
(directory-prefix behavior).
Examples:
--exclude-path /usr/local/bin/docker-*
--include-path /home/*/.bashrc
--include-path 're:^/home/[^/]+/.config/myapp/.*$'
"""
include: Sequence[str] = ()
exclude: Sequence[str] = ()
def __post_init__(self) -> None:
self._include = [
compile_path_pattern(p) for p in self.include if str(p).strip()
]
self._exclude = [
compile_path_pattern(p) for p in self.exclude if str(p).strip()
]
def is_excluded(self, path: str) -> bool:
for pat in self._exclude:
if pat.matches(path):
return True
return False
def iter_include_patterns(self) -> List[CompiledPathPattern]:
return list(self._include)
def expand_includes(
patterns: Sequence[CompiledPathPattern],
*,
exclude: Optional[PathFilter] = None,
max_files: int = 4000,
) -> Tuple[List[str], List[str]]:
"""Expand include patterns into concrete file paths.
Returns (paths, notes). The returned paths are absolute paths.
This function is intentionally conservative:
- symlinks are ignored (both dirs and files)
- the number of collected files is capped
Regex patterns are expanded by walking a best-effort inferred root.
"""
out: List[str] = []
notes: List[str] = []
seen: Set[str] = set()
def _maybe_add_file(p: str) -> None:
if len(out) >= max_files:
return
p = _norm_abs(p)
if exclude and exclude.is_excluded(p):
return
if p in seen:
return
if not os.path.isfile(p) or os.path.islink(p):
return
seen.add(p)
out.append(p)
def _walk_dir(root: str, match: Optional[CompiledPathPattern] = None) -> None:
root = _norm_abs(root)
if not os.path.isdir(root) or os.path.islink(root):
return
for dirpath, dirnames, filenames in os.walk(root, followlinks=False):
# Prune excluded directories early.
if exclude:
dirnames[:] = [
d
for d in dirnames
if not exclude.is_excluded(os.path.join(dirpath, d))
and not os.path.islink(os.path.join(dirpath, d))
]
for fn in filenames:
if len(out) >= max_files:
return
p = os.path.join(dirpath, fn)
if os.path.islink(p) or not os.path.isfile(p):
continue
if exclude and exclude.is_excluded(p):
continue
if match is not None and not match.matches(p):
continue
if p in seen:
continue
seen.add(p)
out.append(_norm_abs(p))
for pat in patterns:
if len(out) >= max_files:
notes.append(
f"Include cap reached ({max_files}); some includes were not expanded."
)
break
matched_any = False
if pat.kind == "prefix":
p = pat.value
if os.path.isfile(p) and not os.path.islink(p):
_maybe_add_file(p)
matched_any = True
elif os.path.isdir(p) and not os.path.islink(p):
before = len(out)
_walk_dir(p)
matched_any = len(out) > before
else:
# Still allow prefix patterns that don't exist now (e.g. remote different)
# by matching nothing rather than erroring.
matched_any = False
elif pat.kind == "glob":
# Use glob for expansion; also walk directories that match.
gpat = pat.value
hits = glob.glob(gpat, recursive=True)
for h in hits:
if len(out) >= max_files:
break
h = _norm_abs(h)
if exclude and exclude.is_excluded(h):
continue
if os.path.isdir(h) and not os.path.islink(h):
before = len(out)
_walk_dir(h)
if len(out) > before:
matched_any = True
elif os.path.isfile(h) and not os.path.islink(h):
_maybe_add_file(h)
matched_any = True
else: # regex
rex = pat.value
prefix = _regex_literal_prefix(rex)
# Determine a walk root. If we can infer an absolute prefix, use its
# directory; otherwise fall back to '/'.
if prefix.startswith("/"):
root = os.path.dirname(prefix) or "/"
else:
root = "/"
before = len(out)
_walk_dir(root, match=pat)
matched_any = len(out) > before
if not matched_any:
notes.append(f"Include pattern matched no files: {pat.raw!r}")
return out, notes

View file

@ -1,6 +1,7 @@
from __future__ import annotations
import os
import shlex
import shutil
import tarfile
import tempfile
@ -97,6 +98,8 @@ def remote_harvest(
remote_python: str = "python3",
dangerous: bool = False,
no_sudo: bool = False,
include_paths: Optional[list[str]] = None,
exclude_paths: Optional[list[str]] = None,
) -> Path:
"""Run enroll harvest on a remote host via SSH and pull the bundle locally.
@ -165,13 +168,25 @@ def remote_harvest(
sftp.put(str(pyz), rapp)
# Run remote harvest.
_cmd = f"{remote_python} {rapp} harvest --out {rbundle}"
argv: list[str] = [
remote_python,
rapp,
"harvest",
"--out",
rbundle,
]
if dangerous:
argv.append("--dangerous")
for p in include_paths or []:
argv.extend(["--include-path", str(p)])
for p in exclude_paths or []:
argv.extend(["--exclude-path", str(p)])
_cmd = " ".join(shlex.quote(a) for a in argv)
if not no_sudo:
cmd = f"sudo {_cmd}"
else:
cmd = _cmd
if dangerous:
cmd += " --dangerous"
rc, out, err = _ssh_run(ssh, cmd)
if rc != 0:
raise RuntimeError(

View file

@ -6,9 +6,17 @@ import enroll.cli as cli
def test_cli_harvest_subcommand_calls_harvest(monkeypatch, capsys, tmp_path):
called = {}
def fake_harvest(out: str, dangerous: bool = False):
def fake_harvest(
out: str,
dangerous: bool = False,
include_paths=None,
exclude_paths=None,
**_kwargs,
):
called["out"] = out
called["dangerous"] = dangerous
called["include_paths"] = include_paths or []
called["exclude_paths"] = exclude_paths or []
return str(tmp_path / "state.json")
monkeypatch.setattr(cli, "harvest", fake_harvest)
@ -17,6 +25,8 @@ def test_cli_harvest_subcommand_calls_harvest(monkeypatch, capsys, tmp_path):
cli.main()
assert called["out"] == str(tmp_path)
assert called["dangerous"] is False
assert called["include_paths"] == []
assert called["exclude_paths"] == []
captured = capsys.readouterr()
assert str(tmp_path / "state.json") in captured.out
@ -55,8 +65,16 @@ def test_cli_manifest_subcommand_calls_manifest(monkeypatch, tmp_path):
def test_cli_enroll_subcommand_runs_harvest_then_manifest(monkeypatch, tmp_path):
calls = []
def fake_harvest(bundle_dir: str, dangerous: bool = False):
calls.append(("harvest", bundle_dir, dangerous))
def fake_harvest(
bundle_dir: str,
dangerous: bool = False,
include_paths=None,
exclude_paths=None,
**_kwargs,
):
calls.append(
("harvest", bundle_dir, dangerous, include_paths or [], exclude_paths or [])
)
return str(tmp_path / "bundle" / "state.json")
def fake_manifest(bundle_dir: str, out_dir: str, **kwargs):
@ -87,7 +105,7 @@ def test_cli_enroll_subcommand_runs_harvest_then_manifest(monkeypatch, tmp_path)
cli.main()
assert calls == [
("harvest", str(tmp_path / "bundle"), False),
("harvest", str(tmp_path / "bundle"), False, [], []),
("manifest", str(tmp_path / "bundle"), str(tmp_path / "ansible"), None, "auto"),
]
@ -95,9 +113,17 @@ def test_cli_enroll_subcommand_runs_harvest_then_manifest(monkeypatch, tmp_path)
def test_cli_harvest_dangerous_flag_is_forwarded(monkeypatch, tmp_path):
called = {}
def fake_harvest(out: str, dangerous: bool = False):
def fake_harvest(
out: str,
dangerous: bool = False,
include_paths=None,
exclude_paths=None,
**_kwargs,
):
called["out"] = out
called["dangerous"] = dangerous
called["include_paths"] = include_paths or []
called["exclude_paths"] = exclude_paths or []
return str(tmp_path / "state.json")
monkeypatch.setattr(cli, "harvest", fake_harvest)
@ -107,6 +133,8 @@ def test_cli_harvest_dangerous_flag_is_forwarded(monkeypatch, tmp_path):
cli.main()
assert called["dangerous"] is True
assert called["include_paths"] == []
assert called["exclude_paths"] == []
def test_cli_harvest_remote_calls_remote_harvest_and_uses_cache_dir(
@ -131,6 +159,9 @@ def test_cli_harvest_remote_calls_remote_harvest_and_uses_cache_dir(
remote_user,
dangerous,
no_sudo,
include_paths=None,
exclude_paths=None,
**_kwargs,
):
called.update(
{
@ -140,6 +171,8 @@ def test_cli_harvest_remote_calls_remote_harvest_and_uses_cache_dir(
"remote_user": remote_user,
"dangerous": dangerous,
"no_sudo": no_sudo,
"include_paths": include_paths or [],
"exclude_paths": exclude_paths or [],
}
)
return cache_dir / "state.json"
@ -169,6 +202,8 @@ def test_cli_harvest_remote_calls_remote_harvest_and_uses_cache_dir(
assert called["remote_user"] == "alice"
assert called["dangerous"] is False
assert called["no_sudo"] is False
assert called["include_paths"] == []
assert called["exclude_paths"] == []
def test_cli_single_shot_remote_without_harvest_prints_state_path(