Allow the user to add extra paths to harvest, or
paths to ignore, using `--exclude-path` and `--include-path` arguments.
This commit is contained in:
parent
25add369dc
commit
240e79706f
9 changed files with 687 additions and 12 deletions
|
|
@ -1,3 +1,8 @@
|
|||
# 0.1.3
|
||||
|
||||
* Allow the user to add extra paths to harvest, or paths to ignore, using `--exclude-path` and `--include-path`
|
||||
arguments.
|
||||
|
||||
# 0.1.2
|
||||
|
||||
* Include files from `/usr/local/bin` and `/usr/local/etc` in harvest (assuming they aren't binaries or
|
||||
|
|
|
|||
26
README.md
26
README.md
|
|
@ -69,6 +69,7 @@ Harvest state about a host and write a harvest bundle.
|
|||
- Changed-from-default config (plus related custom/unowned files under service dirs)
|
||||
- Non-system users + SSH public keys
|
||||
- Misc `/etc` that can’t be attributed to a package (`etc_custom` role)
|
||||
- Optional user-specified extra files/dirs via `--include-path` (emitted as an `extra_paths` role at manifest time)
|
||||
|
||||
**Common flags**
|
||||
- Remote harvesting:
|
||||
|
|
@ -79,6 +80,14 @@ Harvest state about a host and write a harvest bundle.
|
|||
- `--dangerous`: disables secret-safety checks (see “Sensitive data” below)
|
||||
- Encrypt bundles at rest:
|
||||
- `--sops <FINGERPRINT...>`: writes a single encrypted `harvest.tar.gz.sops` instead of a plaintext directory
|
||||
- Path selection (include/exclude):
|
||||
- `--include-path <PATTERN>` (repeatable): add extra files/dirs to harvest (even from locations normally ignored, like `/home`). Still subject to secret-safety checks unless `--dangerous`.
|
||||
- `--exclude-path <PATTERN>` (repeatable): skip files/dirs even if they would normally be harvested.
|
||||
- Pattern syntax:
|
||||
- plain path: matches that file; directories match the directory + everything under it
|
||||
- glob (default): supports `*` and `**` (prefix with `glob:` to force)
|
||||
- regex: prefix with `re:` or `regex:`
|
||||
- Precedence: excludes win over includes.
|
||||
|
||||
---
|
||||
|
||||
|
|
@ -227,6 +236,23 @@ enroll harvest --out /tmp/enroll-harvest
|
|||
enroll harvest --remote-host myhost.example.com --remote-user myuser --out /tmp/enroll-harvest
|
||||
```
|
||||
|
||||
### Include paths (`--include-path`)
|
||||
```bash
|
||||
# Add a few dotfiles from /home (still secret-safe unless --dangerous)
|
||||
enroll harvest --out /tmp/enroll-harvest --include-path '/home/*/.bashrc' --include-path '/home/*/.profile'
|
||||
```
|
||||
|
||||
### Exclude paths (`--exclude-path`)
|
||||
```bash
|
||||
# Skip specific /usr/local/bin entries (or patterns)
|
||||
enroll harvest --out /tmp/enroll-harvest --exclude-path '/usr/local/bin/docker-*' --exclude-path '/usr/local/bin/some-tool'
|
||||
```
|
||||
|
||||
### Regex include
|
||||
```bash
|
||||
enroll harvest --out /tmp/enroll-harvest --include-path 're:^/home/[^/]+/\.config/myapp/.*$'
|
||||
```
|
||||
|
||||
### `--dangerous`
|
||||
```bash
|
||||
enroll harvest --out /tmp/enroll-harvest --dangerous
|
||||
|
|
|
|||
|
|
@ -125,6 +125,27 @@ def main() -> None:
|
|||
action="store_true",
|
||||
help="Collect files more aggressively (may include secrets). Disables secret-avoidance checks.",
|
||||
)
|
||||
h.add_argument(
|
||||
"--include-path",
|
||||
action="append",
|
||||
default=[],
|
||||
metavar="PATTERN",
|
||||
help=(
|
||||
"Include extra file paths to harvest (repeatable). Supports globs (including '**') and regex via 're:<regex>'. "
|
||||
"Included files are still filtered by IgnorePolicy unless --dangerous is used."
|
||||
),
|
||||
)
|
||||
h.add_argument(
|
||||
"--exclude-path",
|
||||
action="append",
|
||||
default=[],
|
||||
metavar="PATTERN",
|
||||
help=(
|
||||
"Exclude file paths from harvesting (repeatable). Supports globs (including '**') and regex via 're:<regex>'. "
|
||||
"Excludes apply to all harvesting, including defaults."
|
||||
),
|
||||
)
|
||||
|
||||
h.add_argument(
|
||||
"--sops",
|
||||
nargs="+",
|
||||
|
|
@ -186,6 +207,27 @@ def main() -> None:
|
|||
action="store_true",
|
||||
help="Collect files more aggressively (may include secrets). Disables secret-avoidance checks.",
|
||||
)
|
||||
s.add_argument(
|
||||
"--include-path",
|
||||
action="append",
|
||||
default=[],
|
||||
metavar="PATTERN",
|
||||
help=(
|
||||
"Include extra file paths to harvest (repeatable). Supports globs (including '**') and regex via 're:<regex>'. "
|
||||
"Included files are still filtered by IgnorePolicy unless --dangerous is used."
|
||||
),
|
||||
)
|
||||
s.add_argument(
|
||||
"--exclude-path",
|
||||
action="append",
|
||||
default=[],
|
||||
metavar="PATTERN",
|
||||
help=(
|
||||
"Exclude file paths from harvesting (repeatable). Supports globs (including '**') and regex via 're:<regex>'. "
|
||||
"Excludes apply to all harvesting, including defaults."
|
||||
),
|
||||
)
|
||||
|
||||
s.add_argument(
|
||||
"--sops",
|
||||
nargs="+",
|
||||
|
|
@ -320,6 +362,8 @@ def main() -> None:
|
|||
remote_user=args.remote_user,
|
||||
dangerous=bool(args.dangerous),
|
||||
no_sudo=bool(args.no_sudo),
|
||||
include_paths=list(getattr(args, "include_path", []) or []),
|
||||
exclude_paths=list(getattr(args, "exclude_path", []) or []),
|
||||
)
|
||||
_encrypt_harvest_dir_to_sops(
|
||||
tmp_bundle, out_file, list(sops_fps)
|
||||
|
|
@ -338,6 +382,8 @@ def main() -> None:
|
|||
remote_user=args.remote_user,
|
||||
dangerous=bool(args.dangerous),
|
||||
no_sudo=bool(args.no_sudo),
|
||||
include_paths=list(getattr(args, "include_path", []) or []),
|
||||
exclude_paths=list(getattr(args, "exclude_path", []) or []),
|
||||
)
|
||||
print(str(state))
|
||||
else:
|
||||
|
|
@ -350,7 +396,12 @@ def main() -> None:
|
|||
os.chmod(tmp_bundle, 0o700)
|
||||
except OSError:
|
||||
pass
|
||||
harvest(str(tmp_bundle), dangerous=bool(args.dangerous))
|
||||
harvest(
|
||||
str(tmp_bundle),
|
||||
dangerous=bool(args.dangerous),
|
||||
include_paths=list(getattr(args, "include_path", []) or []),
|
||||
exclude_paths=list(getattr(args, "exclude_path", []) or []),
|
||||
)
|
||||
_encrypt_harvest_dir_to_sops(
|
||||
tmp_bundle, out_file, list(sops_fps)
|
||||
)
|
||||
|
|
@ -360,7 +411,12 @@ def main() -> None:
|
|||
raise SystemExit(
|
||||
"error: --out is required unless --remote-host is set"
|
||||
)
|
||||
path = harvest(args.out, dangerous=bool(args.dangerous))
|
||||
path = harvest(
|
||||
args.out,
|
||||
dangerous=bool(args.dangerous),
|
||||
include_paths=list(getattr(args, "include_path", []) or []),
|
||||
exclude_paths=list(getattr(args, "exclude_path", []) or []),
|
||||
)
|
||||
print(path)
|
||||
elif args.cmd == "manifest":
|
||||
out_enc = manifest(
|
||||
|
|
@ -446,6 +502,8 @@ def main() -> None:
|
|||
remote_user=args.remote_user,
|
||||
dangerous=bool(args.dangerous),
|
||||
no_sudo=bool(args.no_sudo),
|
||||
include_paths=list(getattr(args, "include_path", []) or []),
|
||||
exclude_paths=list(getattr(args, "exclude_path", []) or []),
|
||||
)
|
||||
_encrypt_harvest_dir_to_sops(
|
||||
tmp_bundle, out_file, list(sops_fps)
|
||||
|
|
@ -473,6 +531,8 @@ def main() -> None:
|
|||
remote_user=args.remote_user,
|
||||
dangerous=bool(args.dangerous),
|
||||
no_sudo=bool(args.no_sudo),
|
||||
include_paths=list(getattr(args, "include_path", []) or []),
|
||||
exclude_paths=list(getattr(args, "exclude_path", []) or []),
|
||||
)
|
||||
manifest(
|
||||
str(harvest_dir),
|
||||
|
|
@ -493,7 +553,12 @@ def main() -> None:
|
|||
os.chmod(tmp_bundle, 0o700)
|
||||
except OSError:
|
||||
pass
|
||||
harvest(str(tmp_bundle), dangerous=bool(args.dangerous))
|
||||
harvest(
|
||||
str(tmp_bundle),
|
||||
dangerous=bool(args.dangerous),
|
||||
include_paths=list(getattr(args, "include_path", []) or []),
|
||||
exclude_paths=list(getattr(args, "exclude_path", []) or []),
|
||||
)
|
||||
_encrypt_harvest_dir_to_sops(
|
||||
tmp_bundle, out_file, list(sops_fps)
|
||||
)
|
||||
|
|
@ -512,7 +577,12 @@ def main() -> None:
|
|||
raise SystemExit(
|
||||
"error: --harvest is required unless --remote-host is set"
|
||||
)
|
||||
harvest(args.harvest, dangerous=bool(args.dangerous))
|
||||
harvest(
|
||||
args.harvest,
|
||||
dangerous=bool(args.dangerous),
|
||||
include_paths=list(getattr(args, "include_path", []) or []),
|
||||
exclude_paths=list(getattr(args, "exclude_path", []) or []),
|
||||
)
|
||||
manifest(
|
||||
args.harvest,
|
||||
args.out,
|
||||
|
|
|
|||
|
|
@ -196,6 +196,12 @@ def _iter_managed_files(state: Dict[str, Any]) -> Iterable[Tuple[str, Dict[str,
|
|||
for mf in ul.get("managed_files", []) or []:
|
||||
yield str(ul_role), mf
|
||||
|
||||
# extra_paths
|
||||
xp = state.get("extra_paths") or {}
|
||||
xp_role = xp.get("role_name") or "extra_paths"
|
||||
for mf in xp.get("managed_files", []) or []:
|
||||
yield str(xp_role), mf
|
||||
|
||||
|
||||
def _file_index(bundle_dir: Path, state: Dict[str, Any]) -> Dict[str, FileRec]:
|
||||
"""Return mapping of absolute path -> FileRec.
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ from .debian import (
|
|||
stat_triplet,
|
||||
)
|
||||
from .ignore import IgnorePolicy
|
||||
from .pathfilter import PathFilter, expand_includes
|
||||
from .accounts import collect_non_system_users
|
||||
|
||||
|
||||
|
|
@ -86,6 +87,16 @@ class UsrLocalCustomSnapshot:
|
|||
notes: List[str]
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExtraPathsSnapshot:
|
||||
role_name: str
|
||||
include_patterns: List[str]
|
||||
exclude_patterns: List[str]
|
||||
managed_files: List[ManagedFile]
|
||||
excluded: List[ExcludedFile]
|
||||
notes: List[str]
|
||||
|
||||
|
||||
ALLOWED_UNOWNED_EXTS = {
|
||||
".conf",
|
||||
".cfg",
|
||||
|
|
@ -250,6 +261,8 @@ def harvest(
|
|||
policy: Optional[IgnorePolicy] = None,
|
||||
*,
|
||||
dangerous: bool = False,
|
||||
include_paths: Optional[List[str]] = None,
|
||||
exclude_paths: Optional[List[str]] = None,
|
||||
) -> str:
|
||||
# If a policy is not supplied, build one. `--dangerous` relaxes secret
|
||||
# detection and deny-glob skipping.
|
||||
|
|
@ -261,6 +274,10 @@ def harvest(
|
|||
policy.dangerous = True
|
||||
os.makedirs(bundle_dir, exist_ok=True)
|
||||
|
||||
# User-provided includes/excludes. Excludes apply to all harvesting;
|
||||
# includes are harvested into an extra role.
|
||||
path_filter = PathFilter(include=include_paths or (), exclude=exclude_paths or ())
|
||||
|
||||
if hasattr(os, "geteuid") and os.geteuid() != 0:
|
||||
print(
|
||||
"Warning: not running as root; harvest may miss files or metadata.",
|
||||
|
|
@ -406,6 +423,9 @@ def harvest(
|
|||
)
|
||||
|
||||
for path, reason in sorted(candidates.items()):
|
||||
if path_filter.is_excluded(path):
|
||||
excluded.append(ExcludedFile(path=path, reason="user_excluded"))
|
||||
continue
|
||||
deny = policy.deny_reason(path)
|
||||
if deny:
|
||||
excluded.append(ExcludedFile(path=path, reason=deny))
|
||||
|
|
@ -522,6 +542,9 @@ def harvest(
|
|||
candidates.setdefault(r, "custom_specific_path")
|
||||
|
||||
for path, reason in sorted(candidates.items()):
|
||||
if path_filter.is_excluded(path):
|
||||
excluded.append(ExcludedFile(path=path, reason="user_excluded"))
|
||||
continue
|
||||
deny = policy.deny_reason(path)
|
||||
if deny:
|
||||
excluded.append(ExcludedFile(path=path, reason=deny))
|
||||
|
|
@ -593,6 +616,9 @@ def harvest(
|
|||
|
||||
# Copy only safe SSH public material: authorized_keys + *.pub
|
||||
for sf in u.ssh_files:
|
||||
if path_filter.is_excluded(sf):
|
||||
users_excluded.append(ExcludedFile(path=sf, reason="user_excluded"))
|
||||
continue
|
||||
deny = policy.deny_reason(sf)
|
||||
if deny:
|
||||
users_excluded.append(ExcludedFile(path=sf, reason=deny))
|
||||
|
|
@ -665,6 +691,10 @@ def harvest(
|
|||
if not _is_confish(path):
|
||||
continue
|
||||
|
||||
if path_filter.is_excluded(path):
|
||||
etc_excluded.append(ExcludedFile(path=path, reason="user_excluded"))
|
||||
continue
|
||||
|
||||
deny = policy.deny_reason(path)
|
||||
if deny:
|
||||
etc_excluded.append(ExcludedFile(path=path, reason=deny))
|
||||
|
|
@ -754,6 +784,10 @@ def harvest(
|
|||
ul_excluded.append(ExcludedFile(path=path, reason="unreadable"))
|
||||
continue
|
||||
|
||||
if path_filter.is_excluded(path):
|
||||
ul_excluded.append(ExcludedFile(path=path, reason="user_excluded"))
|
||||
continue
|
||||
|
||||
deny = policy.deny_reason(path)
|
||||
if deny:
|
||||
ul_excluded.append(ExcludedFile(path=path, reason=deny))
|
||||
|
|
@ -806,6 +840,81 @@ def harvest(
|
|||
notes=ul_notes,
|
||||
)
|
||||
|
||||
# -------------------------
|
||||
# extra_paths role (user-requested includes)
|
||||
# -------------------------
|
||||
extra_notes: List[str] = []
|
||||
extra_excluded: List[ExcludedFile] = []
|
||||
extra_managed: List[ManagedFile] = []
|
||||
extra_role_name = "extra_paths"
|
||||
|
||||
include_specs = list(include_paths or [])
|
||||
exclude_specs = list(exclude_paths or [])
|
||||
|
||||
if include_specs:
|
||||
extra_notes.append("User include patterns:")
|
||||
extra_notes.extend([f"- {p}" for p in include_specs])
|
||||
if exclude_specs:
|
||||
extra_notes.append("User exclude patterns:")
|
||||
extra_notes.extend([f"- {p}" for p in exclude_specs])
|
||||
|
||||
included_files: List[str] = []
|
||||
if include_specs:
|
||||
files, inc_notes = expand_includes(
|
||||
path_filter.iter_include_patterns(),
|
||||
exclude=path_filter,
|
||||
max_files=4000,
|
||||
)
|
||||
included_files = files
|
||||
extra_notes.extend(inc_notes)
|
||||
|
||||
for path in included_files:
|
||||
if path in already_all:
|
||||
continue
|
||||
|
||||
if path_filter.is_excluded(path):
|
||||
extra_excluded.append(ExcludedFile(path=path, reason="user_excluded"))
|
||||
continue
|
||||
|
||||
deny = policy.deny_reason(path)
|
||||
if deny:
|
||||
extra_excluded.append(ExcludedFile(path=path, reason=deny))
|
||||
continue
|
||||
|
||||
try:
|
||||
owner, group, mode = stat_triplet(path)
|
||||
except OSError:
|
||||
extra_excluded.append(ExcludedFile(path=path, reason="unreadable"))
|
||||
continue
|
||||
|
||||
src_rel = path.lstrip("/")
|
||||
try:
|
||||
_copy_into_bundle(bundle_dir, extra_role_name, path, src_rel)
|
||||
except OSError:
|
||||
extra_excluded.append(ExcludedFile(path=path, reason="unreadable"))
|
||||
continue
|
||||
|
||||
extra_managed.append(
|
||||
ManagedFile(
|
||||
path=path,
|
||||
src_rel=src_rel,
|
||||
owner=owner,
|
||||
group=group,
|
||||
mode=mode,
|
||||
reason="user_include",
|
||||
)
|
||||
)
|
||||
already_all.add(path)
|
||||
|
||||
extra_paths_snapshot = ExtraPathsSnapshot(
|
||||
role_name=extra_role_name,
|
||||
include_patterns=include_specs,
|
||||
exclude_patterns=exclude_specs,
|
||||
managed_files=extra_managed,
|
||||
excluded=extra_excluded,
|
||||
notes=extra_notes,
|
||||
)
|
||||
|
||||
state = {
|
||||
"host": {"hostname": os.uname().nodename, "os": "debian"},
|
||||
"users": asdict(users_snapshot),
|
||||
|
|
@ -815,6 +924,7 @@ def harvest(
|
|||
"package_roles": [asdict(p) for p in pkg_snaps],
|
||||
"etc_custom": asdict(etc_custom_snapshot),
|
||||
"usr_local_custom": asdict(usr_local_custom_snapshot),
|
||||
"extra_paths": asdict(extra_paths_snapshot),
|
||||
}
|
||||
|
||||
state_path = os.path.join(bundle_dir, "state.json")
|
||||
|
|
|
|||
|
|
@ -630,6 +630,7 @@ def _manifest_from_bundle_dir(
|
|||
users_snapshot: Dict[str, Any] = state.get("users", {})
|
||||
etc_custom_snapshot: Dict[str, Any] = state.get("etc_custom", {})
|
||||
usr_local_custom_snapshot: Dict[str, Any] = state.get("usr_local_custom", {})
|
||||
extra_paths_snapshot: Dict[str, Any] = state.get("extra_paths", {})
|
||||
|
||||
site_mode = fqdn is not None and fqdn != ""
|
||||
|
||||
|
|
@ -663,6 +664,7 @@ def _manifest_from_bundle_dir(
|
|||
manifested_users_roles: List[str] = []
|
||||
manifested_etc_custom_roles: List[str] = []
|
||||
manifested_usr_local_custom_roles: List[str] = []
|
||||
manifested_extra_paths_roles: List[str] = []
|
||||
manifested_service_roles: List[str] = []
|
||||
manifested_pkg_roles: List[str] = []
|
||||
|
||||
|
|
@ -1098,6 +1100,118 @@ Unowned /etc config files not attributed to packages or services.
|
|||
|
||||
manifested_usr_local_custom_roles.append(role)
|
||||
|
||||
# -------------------------
|
||||
# extra_paths role (user-requested includes)
|
||||
# -------------------------
|
||||
if extra_paths_snapshot and extra_paths_snapshot.get("managed_files"):
|
||||
role = extra_paths_snapshot.get("role_name", "extra_paths")
|
||||
role_dir = os.path.join(roles_root, role)
|
||||
_write_role_scaffold(role_dir)
|
||||
|
||||
var_prefix = role
|
||||
|
||||
managed_files = extra_paths_snapshot.get("managed_files", [])
|
||||
excluded = extra_paths_snapshot.get("excluded", [])
|
||||
notes = extra_paths_snapshot.get("notes", [])
|
||||
include_pats = extra_paths_snapshot.get("include_patterns", []) or []
|
||||
exclude_pats = extra_paths_snapshot.get("exclude_patterns", []) or []
|
||||
|
||||
templated, jt_vars = _jinjify_managed_files(
|
||||
bundle_dir,
|
||||
role,
|
||||
role_dir,
|
||||
managed_files,
|
||||
jt_exe=jt_exe,
|
||||
jt_enabled=jt_enabled,
|
||||
overwrite_templates=not site_mode,
|
||||
)
|
||||
|
||||
if site_mode:
|
||||
_copy_artifacts(
|
||||
bundle_dir,
|
||||
role,
|
||||
_host_role_files_dir(out_dir, fqdn or "", role),
|
||||
exclude_rels=templated,
|
||||
)
|
||||
else:
|
||||
_copy_artifacts(
|
||||
bundle_dir,
|
||||
role,
|
||||
os.path.join(role_dir, "files"),
|
||||
exclude_rels=templated,
|
||||
)
|
||||
|
||||
files_var = _build_managed_files_var(
|
||||
managed_files,
|
||||
templated,
|
||||
notify_other=None,
|
||||
notify_systemd=None,
|
||||
)
|
||||
|
||||
jt_map = _yaml_load_mapping(jt_vars) if jt_vars.strip() else {}
|
||||
vars_map: Dict[str, Any] = {f"{var_prefix}_managed_files": files_var}
|
||||
vars_map = _merge_mappings_overwrite(vars_map, jt_map)
|
||||
|
||||
if site_mode:
|
||||
_write_role_defaults(role_dir, {f"{var_prefix}_managed_files": []})
|
||||
_write_hostvars(out_dir, fqdn or "", role, vars_map)
|
||||
else:
|
||||
_write_role_defaults(role_dir, vars_map)
|
||||
|
||||
tasks = "---\n" + _render_generic_files_tasks(
|
||||
var_prefix, include_restart_notify=False
|
||||
)
|
||||
with open(
|
||||
os.path.join(role_dir, "tasks", "main.yml"), "w", encoding="utf-8"
|
||||
) as f:
|
||||
f.write(tasks.rstrip() + "\n")
|
||||
|
||||
with open(
|
||||
os.path.join(role_dir, "handlers", "main.yml"), "w", encoding="utf-8"
|
||||
) as f:
|
||||
f.write("---\n")
|
||||
|
||||
with open(
|
||||
os.path.join(role_dir, "meta", "main.yml"), "w", encoding="utf-8"
|
||||
) as f:
|
||||
f.write("---\ndependencies: []\n")
|
||||
|
||||
readme = (
|
||||
f"""# {role}
|
||||
|
||||
User-requested extra file harvesting.
|
||||
|
||||
## Include patterns
|
||||
"""
|
||||
+ ("\n".join([f"- {p}" for p in include_pats]) or "- (none)")
|
||||
+ """\n
|
||||
## Exclude patterns
|
||||
"""
|
||||
+ ("\n".join([f"- {p}" for p in exclude_pats]) or "- (none)")
|
||||
+ """\n
|
||||
## Managed files
|
||||
"""
|
||||
+ ("\n".join([f"- {mf.get('path')}" for mf in managed_files]) or "- (none)")
|
||||
+ """\n
|
||||
## Excluded
|
||||
"""
|
||||
+ (
|
||||
"\n".join([f"- {e.get('path')} ({e.get('reason')})" for e in excluded])
|
||||
or "- (none)"
|
||||
)
|
||||
+ """\n
|
||||
## Notes
|
||||
"""
|
||||
+ ("\n".join([f"- {n}" for n in notes]) or "- (none)")
|
||||
+ """\n"""
|
||||
)
|
||||
with open(os.path.join(role_dir, "README.md"), "w", encoding="utf-8") as f:
|
||||
f.write(readme)
|
||||
|
||||
manifested_extra_paths_roles.append(role)
|
||||
|
||||
manifested_usr_local_custom_roles.append(role)
|
||||
|
||||
# -------------------------
|
||||
|
||||
# -------------------------
|
||||
|
|
@ -1412,6 +1526,7 @@ Generated for package `{pkg}`.
|
|||
+ manifested_service_roles
|
||||
+ manifested_etc_custom_roles
|
||||
+ manifested_usr_local_custom_roles
|
||||
+ manifested_extra_paths_roles
|
||||
+ manifested_users_roles
|
||||
)
|
||||
|
||||
|
|
|
|||
293
enroll/pathfilter.py
Normal file
293
enroll/pathfilter.py
Normal file
|
|
@ -0,0 +1,293 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import glob
|
||||
import os
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from pathlib import PurePosixPath
|
||||
from typing import List, Optional, Sequence, Set, Tuple
|
||||
|
||||
|
||||
_REGEX_PREFIXES = ("re:", "regex:")
|
||||
|
||||
|
||||
def _has_glob_chars(s: str) -> bool:
|
||||
return any(ch in s for ch in "*?[")
|
||||
|
||||
|
||||
def _norm_abs(p: str) -> str:
|
||||
"""Normalise a path-ish string to an absolute POSIX path.
|
||||
|
||||
We treat inputs that don't start with '/' as being relative to '/'.
|
||||
"""
|
||||
|
||||
p = p.strip()
|
||||
if not p:
|
||||
return "/"
|
||||
if not p.startswith("/"):
|
||||
p = "/" + p
|
||||
# `normpath` keeps a leading '/' for absolute paths.
|
||||
return os.path.normpath(p)
|
||||
|
||||
|
||||
def _posix_match(path: str, pattern: str) -> bool:
|
||||
"""Path matching with glob semantics.
|
||||
|
||||
Uses PurePosixPath.match which:
|
||||
- treats '/' as a segment separator
|
||||
- supports '**' for recursive matching
|
||||
|
||||
Both `path` and `pattern` are treated as absolute paths.
|
||||
"""
|
||||
|
||||
# PurePosixPath.match is anchored and works best on relative strings.
|
||||
p = path.lstrip("/")
|
||||
pat = pattern.lstrip("/")
|
||||
try:
|
||||
return PurePosixPath(p).match(pat)
|
||||
except Exception:
|
||||
# If the pattern is somehow invalid, fail closed.
|
||||
return False
|
||||
|
||||
|
||||
def _regex_literal_prefix(regex: str) -> str:
|
||||
"""Best-effort literal prefix extraction for a regex.
|
||||
|
||||
This lets us pick a starting directory to walk when expanding regex-based
|
||||
include patterns.
|
||||
"""
|
||||
|
||||
s = regex
|
||||
if s.startswith("^"):
|
||||
s = s[1:]
|
||||
out: List[str] = []
|
||||
escaped = False
|
||||
meta = set(".^$*+?{}[]\\|()")
|
||||
for ch in s:
|
||||
if escaped:
|
||||
out.append(ch)
|
||||
escaped = False
|
||||
continue
|
||||
if ch == "\\":
|
||||
escaped = True
|
||||
continue
|
||||
if ch in meta:
|
||||
break
|
||||
out.append(ch)
|
||||
return "".join(out)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class CompiledPathPattern:
|
||||
raw: str
|
||||
kind: str # 'prefix' | 'glob' | 'regex'
|
||||
value: str
|
||||
regex: Optional[re.Pattern[str]] = None
|
||||
|
||||
def matches(self, path: str) -> bool:
|
||||
p = _norm_abs(path)
|
||||
|
||||
if self.kind == "regex":
|
||||
if not self.regex:
|
||||
return False
|
||||
# Search (not match) so users can write unanchored patterns.
|
||||
return self.regex.search(p) is not None
|
||||
|
||||
if self.kind == "glob":
|
||||
return _posix_match(p, self.value)
|
||||
|
||||
# prefix
|
||||
pref = self.value.rstrip("/")
|
||||
return p == pref or p.startswith(pref + "/")
|
||||
|
||||
|
||||
def compile_path_pattern(raw: str) -> CompiledPathPattern:
|
||||
s = raw.strip()
|
||||
for pre in _REGEX_PREFIXES:
|
||||
if s.startswith(pre):
|
||||
rex = s[len(pre) :].strip()
|
||||
try:
|
||||
return CompiledPathPattern(
|
||||
raw=raw, kind="regex", value=rex, regex=re.compile(rex)
|
||||
)
|
||||
except re.error:
|
||||
# Treat invalid regexes as non-matching.
|
||||
return CompiledPathPattern(raw=raw, kind="regex", value=rex, regex=None)
|
||||
|
||||
# If the user explicitly says glob:, honour it.
|
||||
if s.startswith("glob:"):
|
||||
pat = s[len("glob:") :].strip()
|
||||
return CompiledPathPattern(raw=raw, kind="glob", value=_norm_abs(pat))
|
||||
|
||||
# Heuristic: if it contains glob metacharacters, treat as a glob.
|
||||
if _has_glob_chars(s) or "**" in s:
|
||||
return CompiledPathPattern(raw=raw, kind="glob", value=_norm_abs(s))
|
||||
|
||||
# Otherwise treat as an exact path-or-prefix (dir subtree).
|
||||
return CompiledPathPattern(raw=raw, kind="prefix", value=_norm_abs(s))
|
||||
|
||||
|
||||
@dataclass
|
||||
class PathFilter:
|
||||
"""User-provided path filters.
|
||||
|
||||
Semantics:
|
||||
- exclude patterns always win
|
||||
- include patterns are used only to expand *additional* files to harvest
|
||||
(they do not restrict the default harvest set)
|
||||
|
||||
Patterns:
|
||||
- By default: glob-like (supports '**')
|
||||
- Regex: prefix with 're:' or 'regex:'
|
||||
- Force glob: prefix with 'glob:'
|
||||
- A plain path without wildcards matches that path and everything under it
|
||||
(directory-prefix behavior).
|
||||
|
||||
Examples:
|
||||
--exclude-path /usr/local/bin/docker-*
|
||||
--include-path /home/*/.bashrc
|
||||
--include-path 're:^/home/[^/]+/.config/myapp/.*$'
|
||||
"""
|
||||
|
||||
include: Sequence[str] = ()
|
||||
exclude: Sequence[str] = ()
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
self._include = [
|
||||
compile_path_pattern(p) for p in self.include if str(p).strip()
|
||||
]
|
||||
self._exclude = [
|
||||
compile_path_pattern(p) for p in self.exclude if str(p).strip()
|
||||
]
|
||||
|
||||
def is_excluded(self, path: str) -> bool:
|
||||
for pat in self._exclude:
|
||||
if pat.matches(path):
|
||||
return True
|
||||
return False
|
||||
|
||||
def iter_include_patterns(self) -> List[CompiledPathPattern]:
|
||||
return list(self._include)
|
||||
|
||||
|
||||
def expand_includes(
|
||||
patterns: Sequence[CompiledPathPattern],
|
||||
*,
|
||||
exclude: Optional[PathFilter] = None,
|
||||
max_files: int = 4000,
|
||||
) -> Tuple[List[str], List[str]]:
|
||||
"""Expand include patterns into concrete file paths.
|
||||
|
||||
Returns (paths, notes). The returned paths are absolute paths.
|
||||
|
||||
This function is intentionally conservative:
|
||||
- symlinks are ignored (both dirs and files)
|
||||
- the number of collected files is capped
|
||||
|
||||
Regex patterns are expanded by walking a best-effort inferred root.
|
||||
"""
|
||||
|
||||
out: List[str] = []
|
||||
notes: List[str] = []
|
||||
seen: Set[str] = set()
|
||||
|
||||
def _maybe_add_file(p: str) -> None:
|
||||
if len(out) >= max_files:
|
||||
return
|
||||
p = _norm_abs(p)
|
||||
if exclude and exclude.is_excluded(p):
|
||||
return
|
||||
if p in seen:
|
||||
return
|
||||
if not os.path.isfile(p) or os.path.islink(p):
|
||||
return
|
||||
seen.add(p)
|
||||
out.append(p)
|
||||
|
||||
def _walk_dir(root: str, match: Optional[CompiledPathPattern] = None) -> None:
|
||||
root = _norm_abs(root)
|
||||
if not os.path.isdir(root) or os.path.islink(root):
|
||||
return
|
||||
for dirpath, dirnames, filenames in os.walk(root, followlinks=False):
|
||||
# Prune excluded directories early.
|
||||
if exclude:
|
||||
dirnames[:] = [
|
||||
d
|
||||
for d in dirnames
|
||||
if not exclude.is_excluded(os.path.join(dirpath, d))
|
||||
and not os.path.islink(os.path.join(dirpath, d))
|
||||
]
|
||||
for fn in filenames:
|
||||
if len(out) >= max_files:
|
||||
return
|
||||
p = os.path.join(dirpath, fn)
|
||||
if os.path.islink(p) or not os.path.isfile(p):
|
||||
continue
|
||||
if exclude and exclude.is_excluded(p):
|
||||
continue
|
||||
if match is not None and not match.matches(p):
|
||||
continue
|
||||
if p in seen:
|
||||
continue
|
||||
seen.add(p)
|
||||
out.append(_norm_abs(p))
|
||||
|
||||
for pat in patterns:
|
||||
if len(out) >= max_files:
|
||||
notes.append(
|
||||
f"Include cap reached ({max_files}); some includes were not expanded."
|
||||
)
|
||||
break
|
||||
|
||||
matched_any = False
|
||||
|
||||
if pat.kind == "prefix":
|
||||
p = pat.value
|
||||
if os.path.isfile(p) and not os.path.islink(p):
|
||||
_maybe_add_file(p)
|
||||
matched_any = True
|
||||
elif os.path.isdir(p) and not os.path.islink(p):
|
||||
before = len(out)
|
||||
_walk_dir(p)
|
||||
matched_any = len(out) > before
|
||||
else:
|
||||
# Still allow prefix patterns that don't exist now (e.g. remote different)
|
||||
# by matching nothing rather than erroring.
|
||||
matched_any = False
|
||||
|
||||
elif pat.kind == "glob":
|
||||
# Use glob for expansion; also walk directories that match.
|
||||
gpat = pat.value
|
||||
hits = glob.glob(gpat, recursive=True)
|
||||
for h in hits:
|
||||
if len(out) >= max_files:
|
||||
break
|
||||
h = _norm_abs(h)
|
||||
if exclude and exclude.is_excluded(h):
|
||||
continue
|
||||
if os.path.isdir(h) and not os.path.islink(h):
|
||||
before = len(out)
|
||||
_walk_dir(h)
|
||||
if len(out) > before:
|
||||
matched_any = True
|
||||
elif os.path.isfile(h) and not os.path.islink(h):
|
||||
_maybe_add_file(h)
|
||||
matched_any = True
|
||||
|
||||
else: # regex
|
||||
rex = pat.value
|
||||
prefix = _regex_literal_prefix(rex)
|
||||
# Determine a walk root. If we can infer an absolute prefix, use its
|
||||
# directory; otherwise fall back to '/'.
|
||||
if prefix.startswith("/"):
|
||||
root = os.path.dirname(prefix) or "/"
|
||||
else:
|
||||
root = "/"
|
||||
before = len(out)
|
||||
_walk_dir(root, match=pat)
|
||||
matched_any = len(out) > before
|
||||
|
||||
if not matched_any:
|
||||
notes.append(f"Include pattern matched no files: {pat.raw!r}")
|
||||
|
||||
return out, notes
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import shlex
|
||||
import shutil
|
||||
import tarfile
|
||||
import tempfile
|
||||
|
|
@ -97,6 +98,8 @@ def remote_harvest(
|
|||
remote_python: str = "python3",
|
||||
dangerous: bool = False,
|
||||
no_sudo: bool = False,
|
||||
include_paths: Optional[list[str]] = None,
|
||||
exclude_paths: Optional[list[str]] = None,
|
||||
) -> Path:
|
||||
"""Run enroll harvest on a remote host via SSH and pull the bundle locally.
|
||||
|
||||
|
|
@ -165,13 +168,25 @@ def remote_harvest(
|
|||
sftp.put(str(pyz), rapp)
|
||||
|
||||
# Run remote harvest.
|
||||
_cmd = f"{remote_python} {rapp} harvest --out {rbundle}"
|
||||
argv: list[str] = [
|
||||
remote_python,
|
||||
rapp,
|
||||
"harvest",
|
||||
"--out",
|
||||
rbundle,
|
||||
]
|
||||
if dangerous:
|
||||
argv.append("--dangerous")
|
||||
for p in include_paths or []:
|
||||
argv.extend(["--include-path", str(p)])
|
||||
for p in exclude_paths or []:
|
||||
argv.extend(["--exclude-path", str(p)])
|
||||
|
||||
_cmd = " ".join(shlex.quote(a) for a in argv)
|
||||
if not no_sudo:
|
||||
cmd = f"sudo {_cmd}"
|
||||
else:
|
||||
cmd = _cmd
|
||||
if dangerous:
|
||||
cmd += " --dangerous"
|
||||
rc, out, err = _ssh_run(ssh, cmd)
|
||||
if rc != 0:
|
||||
raise RuntimeError(
|
||||
|
|
|
|||
|
|
@ -6,9 +6,17 @@ import enroll.cli as cli
|
|||
def test_cli_harvest_subcommand_calls_harvest(monkeypatch, capsys, tmp_path):
|
||||
called = {}
|
||||
|
||||
def fake_harvest(out: str, dangerous: bool = False):
|
||||
def fake_harvest(
|
||||
out: str,
|
||||
dangerous: bool = False,
|
||||
include_paths=None,
|
||||
exclude_paths=None,
|
||||
**_kwargs,
|
||||
):
|
||||
called["out"] = out
|
||||
called["dangerous"] = dangerous
|
||||
called["include_paths"] = include_paths or []
|
||||
called["exclude_paths"] = exclude_paths or []
|
||||
return str(tmp_path / "state.json")
|
||||
|
||||
monkeypatch.setattr(cli, "harvest", fake_harvest)
|
||||
|
|
@ -17,6 +25,8 @@ def test_cli_harvest_subcommand_calls_harvest(monkeypatch, capsys, tmp_path):
|
|||
cli.main()
|
||||
assert called["out"] == str(tmp_path)
|
||||
assert called["dangerous"] is False
|
||||
assert called["include_paths"] == []
|
||||
assert called["exclude_paths"] == []
|
||||
captured = capsys.readouterr()
|
||||
assert str(tmp_path / "state.json") in captured.out
|
||||
|
||||
|
|
@ -55,8 +65,16 @@ def test_cli_manifest_subcommand_calls_manifest(monkeypatch, tmp_path):
|
|||
def test_cli_enroll_subcommand_runs_harvest_then_manifest(monkeypatch, tmp_path):
|
||||
calls = []
|
||||
|
||||
def fake_harvest(bundle_dir: str, dangerous: bool = False):
|
||||
calls.append(("harvest", bundle_dir, dangerous))
|
||||
def fake_harvest(
|
||||
bundle_dir: str,
|
||||
dangerous: bool = False,
|
||||
include_paths=None,
|
||||
exclude_paths=None,
|
||||
**_kwargs,
|
||||
):
|
||||
calls.append(
|
||||
("harvest", bundle_dir, dangerous, include_paths or [], exclude_paths or [])
|
||||
)
|
||||
return str(tmp_path / "bundle" / "state.json")
|
||||
|
||||
def fake_manifest(bundle_dir: str, out_dir: str, **kwargs):
|
||||
|
|
@ -87,7 +105,7 @@ def test_cli_enroll_subcommand_runs_harvest_then_manifest(monkeypatch, tmp_path)
|
|||
|
||||
cli.main()
|
||||
assert calls == [
|
||||
("harvest", str(tmp_path / "bundle"), False),
|
||||
("harvest", str(tmp_path / "bundle"), False, [], []),
|
||||
("manifest", str(tmp_path / "bundle"), str(tmp_path / "ansible"), None, "auto"),
|
||||
]
|
||||
|
||||
|
|
@ -95,9 +113,17 @@ def test_cli_enroll_subcommand_runs_harvest_then_manifest(monkeypatch, tmp_path)
|
|||
def test_cli_harvest_dangerous_flag_is_forwarded(monkeypatch, tmp_path):
|
||||
called = {}
|
||||
|
||||
def fake_harvest(out: str, dangerous: bool = False):
|
||||
def fake_harvest(
|
||||
out: str,
|
||||
dangerous: bool = False,
|
||||
include_paths=None,
|
||||
exclude_paths=None,
|
||||
**_kwargs,
|
||||
):
|
||||
called["out"] = out
|
||||
called["dangerous"] = dangerous
|
||||
called["include_paths"] = include_paths or []
|
||||
called["exclude_paths"] = exclude_paths or []
|
||||
return str(tmp_path / "state.json")
|
||||
|
||||
monkeypatch.setattr(cli, "harvest", fake_harvest)
|
||||
|
|
@ -107,6 +133,8 @@ def test_cli_harvest_dangerous_flag_is_forwarded(monkeypatch, tmp_path):
|
|||
|
||||
cli.main()
|
||||
assert called["dangerous"] is True
|
||||
assert called["include_paths"] == []
|
||||
assert called["exclude_paths"] == []
|
||||
|
||||
|
||||
def test_cli_harvest_remote_calls_remote_harvest_and_uses_cache_dir(
|
||||
|
|
@ -131,6 +159,9 @@ def test_cli_harvest_remote_calls_remote_harvest_and_uses_cache_dir(
|
|||
remote_user,
|
||||
dangerous,
|
||||
no_sudo,
|
||||
include_paths=None,
|
||||
exclude_paths=None,
|
||||
**_kwargs,
|
||||
):
|
||||
called.update(
|
||||
{
|
||||
|
|
@ -140,6 +171,8 @@ def test_cli_harvest_remote_calls_remote_harvest_and_uses_cache_dir(
|
|||
"remote_user": remote_user,
|
||||
"dangerous": dangerous,
|
||||
"no_sudo": no_sudo,
|
||||
"include_paths": include_paths or [],
|
||||
"exclude_paths": exclude_paths or [],
|
||||
}
|
||||
)
|
||||
return cache_dir / "state.json"
|
||||
|
|
@ -169,6 +202,8 @@ def test_cli_harvest_remote_calls_remote_harvest_and_uses_cache_dir(
|
|||
assert called["remote_user"] == "alice"
|
||||
assert called["dangerous"] is False
|
||||
assert called["no_sudo"] is False
|
||||
assert called["include_paths"] == []
|
||||
assert called["exclude_paths"] == []
|
||||
|
||||
|
||||
def test_cli_single_shot_remote_without_harvest_prints_state_path(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue