Only capture user-specific .bashrc style files when using mode, in case they contain sensitive env vars.
All checks were successful
CI / test (push) Successful in 14m0s
Lint / test (push) Successful in 42s

This commit is contained in:
Miguel Jacq 2026-06-16 13:35:33 +10:00
parent 8774d019d3
commit 3c19ae54b2
Signed by: mig5
GPG key ID: 03906B4110AAD3B8
5 changed files with 192 additions and 56 deletions

View file

@ -2,6 +2,7 @@
* Add support for detecting flatpaks and snaps
* BREAKING CHANGE: Group all package and systemd-unit roles into Debian Section/RPM Group roles by default, including managed config files and unit state. This mode is not used if `--fqdn` or `--no-common-roles` is set, in which case, the traditional behaviour of preserving one role per package/unit is used instead.
* BREAKING CHANGE: Only capture user-specific .bashrc style files when using `--dangerous` mode, in case they contain sensitive env vars.
# 0.6.0

View file

@ -11,7 +11,7 @@
- Captures config that has **changed from packaged defaults** where possible (e.g dpkg conffile hashes + package md5sums when available).
- Also captures **service-relevant custom/unowned files** under `/etc/<service>/...` (e.g. drop-in config includes).
- Defensively excludes likely secrets (path denylist + content sniff + size caps).
- Captures non-system users and their SSH public keys and any .bashrc or .bash_aliases or .profile files that deviate from the skel defaults.
- Captures non-system users and their SSH public keys. In `--dangerous` mode, it also auto-harvests common shell dotfiles such as `.bashrc`, `.profile`, `.bash_logout`, and `.bash_aliases` when appropriate.
- Captures miscellaneous `/etc` files it can't attribute to a package and installs them in an `etc_custom` role.
- Captures live ipset and iptables runtime state into a fallback `firewall_runtime` role, when active ipsets/iptables rules are present *and* no corresponding persistent ipset/iptables *files* were found.
- Captures symlinks in common applications that rely on them, e.g apache2/nginx 'sites-enabled'
@ -70,6 +70,7 @@ Harvest state about a host and write a harvest bundle.
- “Manual” packages
- Changed-from-default config (plus related custom/unowned files under service dirs)
- Non-system users + SSH public keys
- In `--dangerous` mode: common per-user shell dotfiles that are likely to represent deliberate account customisation
- Misc `/etc` that can't be attributed to a package (`etc_custom` role)
- Static firewall config files such as nftables, UFW, firewalld, `/etc/iptables/rules.v4`, `/etc/iptables/rules.v6`, and `/etc/ipset*`
- Live kernel ipset/iptables state via `ipset save`, `iptables-save`, and `ip6tables-save` as a fallback, but only when the corresponding persistent config was not found (`firewall_runtime` role at manifest time)
@ -270,6 +271,8 @@ enroll validate ./harvest --fail-on-warnings
By default, `enroll` does **not** assume how you handle secrets in Ansible. It will attempt to avoid harvesting likely sensitive data (private keys, passwords, tokens, etc.). This can mean it skips some config files you may ultimately want to manage.
Automatic harvesting of per-user shell dotfiles is also disabled by default, even when those files differ from `/etc/skel`, because `.bashrc`, `.profile`, `.bash_aliases`, and similar files commonly contain exported tokens, credentials, or aliases/functions with embedded secrets. Use `--dangerous` for automatic shell-dotfile capture, or use targeted `--include-path` patterns for narrower safe-mode review.
If you opt in to collecting everything:
### `--dangerous`

View file

@ -498,6 +498,93 @@ def _capture_file(
return True
USER_SHELL_DOTFILES_WITH_SKEL_BASELINE = [
(".bashrc", "user_shell_rc"),
(".profile", "user_profile"),
(".bash_logout", "user_shell_logout"),
]
USER_SHELL_DOTFILES_WITHOUT_SKEL_BASELINE = [
(".bash_aliases", "user_shell_aliases"),
]
def _capture_user_shell_dotfiles(
*,
bundle_dir: str,
role_name: str,
home: str,
skel_dir: str,
enabled: bool,
policy: IgnorePolicy,
path_filter: PathFilter,
managed_out: List[ManagedFile],
excluded_out: List[ExcludedFile],
seen_role: Optional[Set[str]],
seen_global: Optional[Set[str]],
) -> int:
"""Capture selected per-user shell dotfiles when explicitly enabled.
Shell startup files are useful for reproducing interactive accounts, but they
commonly contain exported tokens, passwords, command aliases with embedded
credentials, and other private context. For that reason, automatic capture is
gated by harvest's dangerous mode. Users who want a narrower safe-mode
selection can still use --include-path, which lands in the extra_paths role
and remains subject to IgnorePolicy content checks.
"""
if not enabled:
return 0
home = (home or "").rstrip("/")
if not home or not home.startswith("/"):
return 0
captured = 0
max_compare_bytes = int(getattr(policy, "max_file_bytes", 256_000))
for rel, reason in USER_SHELL_DOTFILES_WITH_SKEL_BASELINE:
upath = os.path.join(home, rel)
if not os.path.isfile(upath) or os.path.islink(upath):
continue
skel_path = os.path.join(skel_dir, rel)
if not _files_differ(upath, skel_path, max_bytes=max_compare_bytes):
continue
if _capture_file(
bundle_dir=bundle_dir,
role_name=role_name,
abs_path=upath,
reason=reason,
policy=policy,
path_filter=path_filter,
managed_out=managed_out,
excluded_out=excluded_out,
seen_role=seen_role,
seen_global=seen_global,
):
captured += 1
for rel, reason in USER_SHELL_DOTFILES_WITHOUT_SKEL_BASELINE:
upath = os.path.join(home, rel)
if not os.path.isfile(upath) or os.path.islink(upath):
continue
if _capture_file(
bundle_dir=bundle_dir,
role_name=role_name,
abs_path=upath,
reason=reason,
policy=policy,
path_filter=path_filter,
managed_out=managed_out,
excluded_out=excluded_out,
seen_role=seen_role,
seen_global=seen_global,
):
captured += 1
return captured
def _capture_link(
*,
role_name: str,
@ -1888,16 +1975,12 @@ def harvest(
users_role_seen = seen_by_role.setdefault(users_role_name, set())
skel_dir = "/etc/skel"
# Dotfiles to harvest for non-system users. For the common "skeleton"
# files, only capture if the user's copy differs from /etc/skel.
skel_dotfiles = [
(".bashrc", "user_shell_rc"),
(".profile", "user_profile"),
(".bash_logout", "user_shell_logout"),
]
extra_dotfiles = [
(".bash_aliases", "user_shell_aliases"),
]
auto_capture_user_dotfiles = bool(getattr(policy, "dangerous", False))
if user_records and not auto_capture_user_dotfiles:
users_notes.append(
"User shell dotfiles were not auto-harvested because --dangerous was not set; "
"use --dangerous for automatic shell-dotfile capture, or targeted --include-path patterns for safe-mode review."
)
user_flatpaks_map: Dict[str, List[Dict[str, Any]]] = {}
user_flatpak_remotes: List[Dict[str, Any]] = []
@ -1936,40 +2019,16 @@ def harvest(
seen_global=captured_global,
)
# Capture common per-user shell dotfiles when they differ from /etc/skel.
# These still go through IgnorePolicy and user path filters.
# Capture common per-user shell dotfiles only in dangerous mode. They
# often contain exported tokens or aliases/functions with embedded secrets.
home = (u.home or "").rstrip("/")
if home and home.startswith("/"):
for rel, reason in skel_dotfiles:
upath = os.path.join(home, rel)
if not os.path.exists(upath):
continue
skel_path = os.path.join(skel_dir, rel)
if not _files_differ(upath, skel_path, max_bytes=policy.max_file_bytes):
continue
_capture_file(
_capture_user_shell_dotfiles(
bundle_dir=bundle_dir,
role_name=users_role_name,
abs_path=upath,
reason=reason,
policy=policy,
path_filter=path_filter,
managed_out=users_managed,
excluded_out=users_excluded,
seen_role=users_role_seen,
seen_global=captured_global,
)
# Capture other common per-user shell files unconditionally if present.
for rel, reason in extra_dotfiles:
upath = os.path.join(home, rel)
if not os.path.exists(upath):
continue
_capture_file(
bundle_dir=bundle_dir,
role_name=users_role_name,
abs_path=upath,
reason=reason,
home=home,
skel_dir=skel_dir,
enabled=auto_capture_user_dotfiles,
policy=policy,
path_filter=path_filter,
managed_out=users_managed,
@ -1982,7 +2041,6 @@ def harvest(
# system-wide; ~/snap/* is user data, not an install source.
if u.flatpaks:
user_flatpaks_map[u.name] = [asdict(fp) for fp in u.flatpaks]
if home and home.startswith("/"):
user_flatpak_remotes.extend(
asdict(r) for r in find_user_flatpak_remotes(home, user=u.name)
)

View file

@ -1133,7 +1133,8 @@ def _manifest_from_bundle_dir(
group_names = sorted(group_set)
# SSH-related files (authorized_keys, known_hosts, config, etc.)
# User-managed files (authorized_keys plus dangerous-mode shell dotfiles).
# Keep the variable name for compatibility with existing generated data.
ssh_files: List[Dict[str, Any]] = []
for mf in managed_files:
dest = mf.get("path") or ""
@ -1280,7 +1281,7 @@ def _manifest_from_bundle_dir(
mode: "0700"
loop: "{{ users_users | default([]) }}"
- name: Deploy SSH-related files
- name: Deploy user-managed files
vars:
_enroll_ff:
files:

View file

@ -1056,5 +1056,78 @@ class TestCaptureFile:
assert len(managed) == 0
def test_user_shell_dotfiles_are_not_auto_captured_without_dangerous(tmp_path: Path):
home = tmp_path / "home" / "alice"
home.mkdir(parents=True)
(home / ".bashrc").write_text("export DEMO=value\n", encoding="utf-8")
(home / ".bash_aliases").write_text("alias ll='ls -la'\n", encoding="utf-8")
managed: list[ManagedFile] = []
excluded: list[ExcludedFile] = []
captured = harvest._capture_user_shell_dotfiles(
bundle_dir=str(tmp_path / "bundle"),
role_name="users",
home=str(home),
skel_dir=str(tmp_path / "skel"),
enabled=False,
policy=IgnorePolicy(dangerous=False),
path_filter=PathFilter(),
managed_out=managed,
excluded_out=excluded,
seen_role=set(),
seen_global=set(),
)
assert captured == 0
assert managed == []
assert excluded == []
assert not (tmp_path / "bundle" / "artifacts" / "users").exists()
def test_user_shell_dotfiles_dangerous_captures_changed_files_only(tmp_path: Path):
skel = tmp_path / "skel"
home = tmp_path / "home" / "alice"
skel.mkdir(parents=True)
home.mkdir(parents=True)
(skel / ".bashrc").write_text("# default bashrc\n", encoding="utf-8")
(home / ".bashrc").write_text("# customised bashrc\n", encoding="utf-8")
(skel / ".profile").write_text("# default profile\n", encoding="utf-8")
(home / ".profile").write_text("# default profile\n", encoding="utf-8")
(home / ".bash_aliases").write_text("alias ll='ls -la'\n", encoding="utf-8")
target = home / "target"
target.write_text("# symlink target\n", encoding="utf-8")
os.symlink(target, home / ".bash_logout")
managed: list[ManagedFile] = []
excluded: list[ExcludedFile] = []
captured = harvest._capture_user_shell_dotfiles(
bundle_dir=str(tmp_path / "bundle"),
role_name="users",
home=str(home),
skel_dir=str(skel),
enabled=True,
policy=IgnorePolicy(dangerous=True),
path_filter=PathFilter(),
managed_out=managed,
excluded_out=excluded,
seen_role=set(),
seen_global=set(),
)
captured_paths = {mf.path for mf in managed}
assert captured == 2
assert str(home / ".bashrc") in captured_paths
assert str(home / ".bash_aliases") in captured_paths
assert str(home / ".profile") not in captured_paths
assert str(home / ".bash_logout") not in captured_paths
assert excluded == []
if __name__ == "__main__":
pytest.main([__file__, "-v"])